From 70e68cac00126d6346b8dec0dd3656568e48e38d Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Thu, 16 May 2024 14:27:36 +0200 Subject: [PATCH 01/28] implement semantic type for hmmer db + tests --- q2_types/reference_db/_format.py | 45 ++++++++++++++++++- q2_types/reference_db/_type.py | 6 ++- .../tests/data/hmmer/bacteria/a.fa | 6 +++ .../tests/data/hmmer/bacteria/b.fa | 12 +++++ .../tests/data/hmmer/bacteria/b2.fa | 10 +++++ .../data/hmmer/bacteria/bacteria.hmm.h3f | 0 .../data/hmmer/bacteria/bacteria.hmm.h3i | 0 .../data/hmmer/bacteria/bacteria.hmm.h3m | 0 .../data/hmmer/bacteria/bacteria.hmm.h3p | 0 .../data/hmmer/bacteria/bacteria.hmm.idmap | 0 q2_types/reference_db/tests/test_format.py | 42 ++++++++++++++--- q2_types/reference_db/tests/test_type.py | 13 +++++- 12 files changed, 123 insertions(+), 11 deletions(-) create mode 100644 q2_types/reference_db/tests/data/hmmer/bacteria/a.fa create mode 100644 q2_types/reference_db/tests/data/hmmer/bacteria/b.fa create mode 100644 q2_types/reference_db/tests/data/hmmer/bacteria/b2.fa create mode 100644 q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.h3f create mode 100644 q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.h3i create mode 100644 q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.h3m create mode 100644 q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.h3p create mode 100644 q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.idmap diff --git a/q2_types/reference_db/_format.py b/q2_types/reference_db/_format.py index ad6697f2..ecc2e9ee 100644 --- a/q2_types/reference_db/_format.py +++ b/q2_types/reference_db/_format.py @@ -14,9 +14,11 @@ from q2_types.plugin_setup import plugin from q2_types.reference_db._type import ( ReferenceDB, Eggnog, Diamond, NCBITaxonomy, - EggnogProteinSequences + EggnogProteinSequences, HMMER +) +from q2_types.feature_data import ( + MixedCaseProteinFASTAFormat, ProteinFASTAFormat ) -from q2_types.feature_data import MixedCaseProteinFASTAFormat class EggnogRefTextFileFmt(model.TextFileFormat): @@ -294,3 +296,42 @@ class EggnogProteinSequencesDirFmt(model.DirectoryFormat): plugin.register_formats(EggnogProteinSequencesDirFmt) plugin.register_semantic_type_to_format(ReferenceDB[EggnogProteinSequences], EggnogProteinSequencesDirFmt) + + +class HmmerBinaryFileFmt(model.BinaryFileFormat): + def _validate_(self, level): + pass + + +class HmmerDirFmt(model.DirectoryFormat): + """ + The .h3m file contains the profile HMMs + and their annotation in a binary format. The .h3i file is an + SSI index for the .h3m file. The .h3f file contains + precomputed data structures for the fast heuristic filter + (the MSV filter). The .h3p file contains precomputed data + structures for the rest of each profile. + + - Dont know what the idmap file is for. + - Also dont know why there are fasta files but they are needed for + eggnog-hmmer-search action in q2-moshpit. + """ + h3m = model.File(r'.*\.hmm\.h3m', format=HmmerBinaryFileFmt) + h3i = model.File(r'.*\.hmm\.h3i', format=HmmerBinaryFileFmt) + h3f = model.File(r'.*\.hmm\.h3f', format=HmmerBinaryFileFmt) + h3p = model.File(r'.*\.hmm\.h3p', format=HmmerBinaryFileFmt) + idmap = model.File(r'.*\.hmm\.idmap', format=HmmerBinaryFileFmt) + fasta_files = model.FileCollection( + r'.*\.(fa|fasta|faa)$', + format=ProteinFASTAFormat, + optional=False, + ) + + @fasta_files.set_path_maker + def fasta_files_path_maker(self, name): + return str(name) + + +plugin.register_formats(HmmerDirFmt) +plugin.register_semantic_type_to_format(ReferenceDB[HMMER], + HmmerDirFmt) diff --git a/q2_types/reference_db/_type.py b/q2_types/reference_db/_type.py index 0f773a79..0362bf38 100644 --- a/q2_types/reference_db/_type.py +++ b/q2_types/reference_db/_type.py @@ -19,7 +19,9 @@ EggnogProteinSequences = SemanticType( 'EggnogProteinSequences', variant_of=ReferenceDB.field['type'] ) - +HMMER = SemanticType( + 'HMMER', variant_of=ReferenceDB.field['type'] +) plugin.register_semantic_types( - ReferenceDB, Diamond, Eggnog, NCBITaxonomy, EggnogProteinSequences + ReferenceDB, Diamond, Eggnog, NCBITaxonomy, EggnogProteinSequences, HMMER ) diff --git a/q2_types/reference_db/tests/data/hmmer/bacteria/a.fa b/q2_types/reference_db/tests/data/hmmer/bacteria/a.fa new file mode 100644 index 00000000..5abab3f7 --- /dev/null +++ b/q2_types/reference_db/tests/data/hmmer/bacteria/a.fa @@ -0,0 +1,6 @@ +>234831.PSM_A0151 +MKLLYWLDEWLTLSDNEKQAKLPTSGGDLLGDVYVKYHFVDLNNPLLFTFSPAGTDVKERDLNEDFAPWGYHLAQKQNVNIIAFQHLGKSNWFRNRNLIFFIEQLSTLLSPFETKLGYGLSRGGFAVGAFAKLLKLDKVLLFHPVSTKNKLIAPWDDRSSTDIAQQYDWQGDYHDLDLGDAQGYIIYDPTNCIDRQHAKRYKQLTHLRVFGMGHGTHATYLNKFGFYKQVAIDFIANQQIDIAQFRLQTKTLRLKEDYYKKLNKANANSPHRQALLSTAHTILIDEKAAHVQEHQEKIDIQPLIDVAIKHQDENPNDAIKLLEVAQQLAPDDPLVEHKLRQLE +>225849.swp_4415 +MLSPFSIYLEQIQEQLALLLFNQTVRLQVDDVVIQYHIFDTAQPLMITFPPGSEAFSDSDLIENKTPWGYDFFAKRRMNVISFNHIGKGNYFTSNELVIFTEKLGKHLDCFCERIGYGVSRGGFATSMFSKNLRLDRALLLMPISTYDISIASWDPKVREAAQHLNASPDSADCDIPLTIIYDPLYKPDSMHMKRFQSCRVRFPLPGVGHRIPRALLQLGILKSTILQYRQQQIDPASFFLKIRKRRTLSFFYRGLQSCNNTSRFSLRSRVILFHRIQYHINHLDIDPKKIYQQLSESIQKRCFYTTERIFGHGYKNVAGLSALVLC +>87626.PTD2_14957 +MKLLYWLDEWLTLSRDEQQARLPMRGDDLLDDVFVKYDFVDLDKPLLFTFSPAGTNVQEQDLHSDFAPWGYKLGKKQNVNIISFQHLGKSNWFRSRNLIFFLEQLSPLLEPFNQRLGYGLSRGGFAVGAFANLLKLDQVLLFHPVSTKNKQIATWDDRSSTDIAQQFDWQGDYHDLDLGHAKGYIIYDPTNHIDRMHAKRYQQLTHLRVFGMGHGTHATYLNKFGFYKQVAVDFIGNQQIEIAQFRQQTKTLRFKEDYYKRLNRANKNSAHRLGLLSKAHNIVIGEKEEHVQEHQAQIDIQPLIDVALKYQDKHPEDAIELLKVAQQLAPDDPLVEHKLKQLE \ No newline at end of file diff --git a/q2_types/reference_db/tests/data/hmmer/bacteria/b.fa b/q2_types/reference_db/tests/data/hmmer/bacteria/b.fa new file mode 100644 index 00000000..bc655eca --- /dev/null +++ b/q2_types/reference_db/tests/data/hmmer/bacteria/b.fa @@ -0,0 +1,12 @@ +>1268239.PALB_13220 +MKLLYWLDDWLTASRDEQQARLPMMGDDLLDDVFVKYHFVDINKPLLFTFSPAGTNVQEHDLHEDFAPWGYRLAQKQGVNIIAFQHLGKSNWFRSRNLIFFLEQLATLLTPFERRLGYGLSRGGFGVGAFANLLGLDEVLLFHPVSTKNKDKVPWDTRSSTDIAQKFDWRGDFHDVDLGHAKGYIIYDPTNPIDRLHAKRYGQLTHLRVFGMGHGTHATYLNKFGFYKQVAVDFIRHQVIDIAQFRQQTKTLRFKEDYYKRLKKANAQSSHRKGLLKKAHQILKDEKQEHVQEHQAQIDIQPLIDIAMKHQEKHPEDALQLLEVAQQLAPDDPLVEHKIKQLGE +>1116375.VEJY3_16241 +MKLLYWLDEWLTLSRKEQETRLPISGEDLLDDVFVKYEFVDLNKPLLFTFSPAGTNLQVQDLHPDFAPWGYRLAQKQKVNIISFQHLGKSNWFRSRNLIFFLEQLSTLLAPFECRLGYGLSRGGFAIGAFANLLKLDQVLLFHPVSTKNQALVPWDNRSSTEIAQQFDWDGDYHDLDLGDARGYIIYDPTNDIDRLHAKRYPELTHLRVYGMGHGTHATYMNKFGFYKQVAADFIRHQQIDIAQFRHQTKTLRLKEDYYHCLNKANASSQHRLNLLSTAHNVLIDEKKEHVKEHQAQIDIQPLVDIALKHEHDNPQDTVQLLEVAQQLVPGDPLVEHKLQQLT +>1307437.J139_15221 +MKLLYWLDEWLTLCRDEQQTKLPMCGGDLLGDVYVKYDFVDLNKPLLFTFSPAGTNVQEHDLTDDFAPWGYHLAQKQNVNVISFQHLGKSNWFRSRNLIFFLEQLSSLLTPFKCRLGYGLSRGGFAVGAFAKLLKLDQVLFFHPVSTKNTETVPWDTRSSTELAQQFDWQSEYNDLDLGHAKGYIIYDPTNKIDRLHAKRYPQLTHLRVFGMGHGTHASYLTKFGFYKQVAVDFIRHQQIDIAQFRLQTKTLRLKEEYYQSLNKANASSPHRLALLSTAHQILADEKEVHVQEHQAKIDIQPLIDVALKHQDEHPNDAIQLLEVAQQIVPDDPLVEHKLKQLE +>1328313.DS2_04565 +MKLLYWLDDWLSQTPEQQQTSLPFAGSDLLGDVFVKYHFIDTNKPLLFTFSPAGTNLQEQDLHEDFNPWGYKLARSQQVNIISFQHLGRSNWFRSRNLIFFIEQLAELLGPFKCRLGYGLSRGGFGVGAFANLLKLDQVLLFHPVSTKNKAKVPWDQRSSTDIAQKFDWLGDYHDVDLGHAKGYIIYDPTNPTDRQHAKRYPQLNHLRVYGMGHGTHATYLTKFGFYKQVAVDFIANQQIDVAAFRQQTRTLRFKEDYYKKLNKANAQSAHRLSLLSKAHQILLEEKAQHIQDHQAQIDVQPLVDIALKHEQDNPQDAIQLLEVAQQLSPEDPLIDHKLKQLK +>1333507.AUTQ01000270_gene2952 +MKLLYWLDEWLTLPRNEQQTRLPMTGSDLLGDVFVKYDFVDVNKPLLFTFSPAGTNVQEQDLHPDFAPWGYHLAQKQNVNVIAFQHLGKSNWFRNRNLIFFLEQLSTLLTPFNCRLGYGLSRGGFAVGAFAKLLKLNQVLFFHPVSTKNKELVSWDDRSSTDIAQQFDWQQDYHDLDLGDAQGYIIYDPTNRIDRMHAKRYKQLTHLRVFGMGHGTHATYLNKFGFYKQVAVDFIQHQQIDIAQFRLQTKTLRFKEDYYKRLNKANTNSVHRKELLSKAHNILIDEKKVHIQEHQKKIDIQPLIDVALKHQDEHPADAIQLLEVAQKLVPGDPLIEHKLKQLE +>1336233.JAEH01000031_gene235 +MLTLHEDVRIDSCDMSYQYHIVDLSKPVVLCFAPGNSGTDRMDMQQNLWGFDYLKSRKMNVLSITHNGQQNFYQSQACMDIFNALGECLAVFPERIAYGSSRGCFAIGLHAKRLGLDRALMMMPISSMNAELAPQEPKVKQYGAHPNWQGPHNDAAICDIPLTVICDSLYPADHHHYRRFSNVVQFLRLPGVGHRVPSVLNKMGMLSKVVIDYLHNEIDTQAFYKEARKRRQLNVYYRQLLRDPTGKLTTKRKFILRKHQTHVAVSNLSQQLSAKGSAKASAAKQWLIAKKPNLSLIK \ No newline at end of file diff --git a/q2_types/reference_db/tests/data/hmmer/bacteria/b2.fa b/q2_types/reference_db/tests/data/hmmer/bacteria/b2.fa new file mode 100644 index 00000000..c354a353 --- /dev/null +++ b/q2_types/reference_db/tests/data/hmmer/bacteria/b2.fa @@ -0,0 +1,10 @@ +>722419.PH505_aa01730 +MKLLYWLDEWLTLPRNEQQTRLPMTGSDLLGDVFVKYDFVDVNKPLLFTFSPAGTNVQEQDLHPDFAPWGYHLAQKQNVNVIAFQHLGKSNWFRNRNLIFFLEQLSTLLTPFNCRLGYGLSRGGFAVGAFAKLLKLNQVLFFHPVSTKNKELVPWDDRSSTDIAQQFDWQQDYHDLDLGDAQGYIIYDPTNRIDRMHAKRYKQLTHLRVFGMGHGTHATYLNKFGFYKQVAVDFIQHQQIDIAQFRMQTKTLRFKEDYYKSLNKANVNSAHRQTLLSKAHNILIDEKEVHVQEHQAKIDIQPLIDVALKHQDEHPNDAIQLLEVAQQLVPDDPLIEHKIKQLEE +>425104.Ssed_4030 +MLKFNQDVRQRIGNVIMLYRLVDVNAPVVITFPPADHGVAESEAWSCTPWGFDFLTSQKINTISFADIGEHFYYHSAEFVNFIELLAQELVIFPQRLGYGVSKGGFGVSLHADRLGLDRALLMMPLSTFNDKKAPWDSAAIRASKAVDCSSPLNDSCRCQTPLTIIFDPLNPRDRRQAVRFRSTSVSLKLPGVGHRIPRALQELGLLKKLVLDFIHNRLDTDAFPGQVRKRRTLSVYYRNLLSNPTQKLTFKRKIVLYYHKLNLQLANIEDEPARILCRIKQSLRKRKYLVEKCHIQLQHVIAERQLALCTAMVFCL +>316275.VSAL_II0711 +MKLLYWLDEWLTHSRSEQQAQLPMSGGDLLDDVFVKYEFVDLDKPLLFTFSPAGTNVQEQDINADFAPWGYHLAQKQQVNIISFQHLGKSNWFRNRNLIFFLEQLTTLLDPFTYRLGYGLSRGGFAVGAFANLLELDQVLFFHPVSTKNQEIAPWDDRSSTELAQKFDWLGDYHDLNLGKAKGYIIYDPTNRIDRLHAKRYPELTHLRVFGMGHGTHSTYLNKFGFYKQVAVDFIRHQKIDIAQFRQQTKTLRFKEDYYQRLNKANSSSEHRLGLLSKAHNILIDEKEAHVQEHQAQLDVQPLIDIALKHQDEHPQDAIQLLEMALKLVPDDPLVERKLKQLI +>312309.VF_1348 +MKLLYWLDEWLTNSRSQQQARLPMTGSDLLDDVFVKYEFVDLDKPLLFTFSPAGTNLKEQDLHEDYAPWGYHLARKQDVNVISFQHLGQSNWFRSRNLIFFLEQLSTLLEPFTYRLGYGLSRGGFAIGAFANLLQLDQVLLFHPVSTKNQNIAPWDDRSSTEIAQKFDWEGDYHDLDLGKAKGYIIYDPTNRIDRLHAKRYPELTHLRVFGMGHGTHATYLNKFGFYKQVAVDFMRHQKVDIAQFRQQTKTLRFKEDYYKRLNKANSSSEHRLGLLSKAHNIVIGEKEAHVQEHQAQIDVQPLIDIALKHKDEHPKDAIQLLEMAQLLVPDDPLVEHKLKQLA +>1454202.PPBDW_90566___1 +MKLLYWLDEWLTHSRSQQQAQLPMSGGDLLDDVFVKYEFVDVNKPLLFTFSPAGTNVQEQDLDDDFAPWGYHLARKQQVNVISFQHLGKSNWFRSRNLIFFLEQLTTLLEPFNYRLGYGLSRGGFAVGAFANLLQLDQVLLFHPVSTKNKSIAPWDDRSSTALAQQFDWEGDYHDLNLGKAKGYIIYDPTNNIDRLHAKRYPELTHLRVFGMGHGTHATYLNKFGFYKQVAVDFIRYQKIDIAQFRQQTKTLRFKEDYYQSLNKANAYSEHRLGLLSTAHNILIDEKEAHVQEHQAQIDVQPLIDIAIKHQDDYPQDAIQLLEMAQQLVPNDPLVEHKLRQLL \ No newline at end of file diff --git a/q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.h3f b/q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.h3f new file mode 100644 index 00000000..e69de29b diff --git a/q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.h3i b/q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.h3i new file mode 100644 index 00000000..e69de29b diff --git a/q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.h3m b/q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.h3m new file mode 100644 index 00000000..e69de29b diff --git a/q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.h3p b/q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.h3p new file mode 100644 index 00000000..e69de29b diff --git a/q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.idmap b/q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.idmap new file mode 100644 index 00000000..e69de29b diff --git a/q2_types/reference_db/tests/test_format.py b/q2_types/reference_db/tests/test_format.py index 105b27da..e9489f31 100644 --- a/q2_types/reference_db/tests/test_format.py +++ b/q2_types/reference_db/tests/test_format.py @@ -5,13 +5,16 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- +import tempfile +import shutil +import os from qiime2.plugin.testing import TestPluginBase from q2_types.reference_db._format import ( - DiamondDatabaseFileFmt, DiamondDatabaseDirFmt, EggnogRefBinFileFmt, - EggnogRefDirFmt, NCBITaxonomyNamesFormat, NCBITaxonomyNodesFormat, - NCBITaxonomyDirFmt, NCBITaxonomyBinaryFileFmt, - EggnogProteinSequencesDirFmt, EggnogRefTextFileFmt - ) + DiamondDatabaseFileFmt, DiamondDatabaseDirFmt, EggnogRefBinFileFmt, + EggnogRefDirFmt, NCBITaxonomyNamesFormat, NCBITaxonomyNodesFormat, + NCBITaxonomyDirFmt, NCBITaxonomyBinaryFileFmt, + EggnogProteinSequencesDirFmt, EggnogRefTextFileFmt, HmmerDirFmt +) from qiime2.plugin import ValidationError @@ -151,6 +154,35 @@ def test_EggnogRefTextFileFmt_invalid_taxid_lineage(self): ): fmt_obj.validate() + def test_HmmerDirFmt(self): + fmt = HmmerDirFmt(self.get_data_path("hmmer"), 'r') + fmt.validate() + + def test_HmmerDirFmt_missing_hmm(self): + with tempfile.TemporaryDirectory() as tmp: + shutil.copytree( + self.get_data_path("hmmer"), tmp, dirs_exist_ok=True + ) + os.remove(f"{tmp}/bacteria/bacteria.hmm.h3f") + fmt = HmmerDirFmt(tmp, 'r') + with self.assertRaisesRegex( + ValidationError, "Missing one or more files" + ): + fmt.validate(level="min") + + def test_HmmerDirFmt_missing_fa(self): + with tempfile.TemporaryDirectory() as tmp: + shutil.copytree( + self.get_data_path("hmmer"), tmp, dirs_exist_ok=True + ) + for file in ["a", "b", "b2"]: + os.remove(f"{tmp}/bacteria/{file}.fa") + fmt = HmmerDirFmt(tmp, 'r') + with self.assertRaisesRegex( + ValidationError, "Missing one or more files" + ): + fmt.validate(level="min") + class TestNCBIFormats(TestPluginBase): package = "q2_types.reference_db.tests" diff --git a/q2_types/reference_db/tests/test_type.py b/q2_types/reference_db/tests/test_type.py index 01d3a44e..eb25f0f1 100644 --- a/q2_types/reference_db/tests/test_type.py +++ b/q2_types/reference_db/tests/test_type.py @@ -10,10 +10,11 @@ from q2_types.reference_db._format import ( DiamondDatabaseDirFmt, EggnogRefDirFmt, NCBITaxonomyDirFmt, - EggnogProteinSequencesDirFmt + EggnogProteinSequencesDirFmt, HmmerDirFmt ) from q2_types.reference_db._type import ( - ReferenceDB, Diamond, Eggnog, NCBITaxonomy, EggnogProteinSequences + ReferenceDB, Diamond, Eggnog, NCBITaxonomy, EggnogProteinSequences, + HMMER ) @@ -54,3 +55,11 @@ def test_EggnogProteinSequences_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( ReferenceDB[EggnogProteinSequences], EggnogProteinSequencesDirFmt) + + def test_hmmer_registration(self): + self.assertRegisteredSemanticType(HMMER) + + def test_HMMER_semantic_type_registered_to_DirFmt(self): + self.assertSemanticTypeRegisteredToFormat( + ReferenceDB[HMMER], HmmerDirFmt + ) From 0d729a31d36ceb65c12c1c258623941a67084022 Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Fri, 17 May 2024 11:29:10 +0200 Subject: [PATCH 02/28] Add format and type to reference_db.__init__.py --- q2_types/reference_db/__init__.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/q2_types/reference_db/__init__.py b/q2_types/reference_db/__init__.py index be66ab92..d8a8b796 100644 --- a/q2_types/reference_db/__init__.py +++ b/q2_types/reference_db/__init__.py @@ -10,7 +10,7 @@ from q2_types.reference_db._type import ( ReferenceDB, Diamond, Eggnog, NCBITaxonomy, - EggnogProteinSequences + EggnogProteinSequences, HMMER ) from q2_types.reference_db._format import ( @@ -20,13 +20,17 @@ DiamondDatabaseFileFmt, DiamondDatabaseDirFmt, NCBITaxonomyDirFmt, - EggnogProteinSequencesDirFmt + EggnogProteinSequencesDirFmt, + HmmerDirFmt ) -__all__ = ['ReferenceDB', 'Diamond', 'Eggnog', 'DiamondDatabaseFileFmt', - 'DiamondDatabaseDirFmt', 'EggnogRefDirFmt', 'EggnogRefTextFileFmt', - 'EggnogRefBinFileFmt', 'NCBITaxonomyDirFmt', 'NCBITaxonomy', - 'EggnogProteinSequencesDirFmt', 'EggnogProteinSequences'] +__all__ = [ + 'ReferenceDB', 'Diamond', 'Eggnog', 'DiamondDatabaseFileFmt', + 'DiamondDatabaseDirFmt', 'EggnogRefDirFmt', 'EggnogRefTextFileFmt', + 'EggnogRefBinFileFmt', 'NCBITaxonomyDirFmt', 'NCBITaxonomy', + 'EggnogProteinSequencesDirFmt', 'EggnogProteinSequences', 'HMMER', + 'HmmerDirFmt' +] importlib.import_module('q2_types.reference_db._format') importlib.import_module('q2_types.reference_db._type') From 571d35f50b770c4367d2c70326f47bb907c5bd87 Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Thu, 23 May 2024 11:33:59 +0200 Subject: [PATCH 03/28] idmap file fmt + tests --- q2_types/reference_db/_format.py | 32 +++++++++- .../data/hmmer/bacteria/bacteria.hmm.idmap | 19 ++++++ .../data/hmmer/invalid_idmaps/1.hmm.idmap | 19 ++++++ .../data/hmmer/invalid_idmaps/2.hmm.idmap | 19 ++++++ .../data/hmmer/invalid_idmaps/3.hmm.idmap | 19 ++++++ .../data/hmmer/invalid_idmaps/4.hmm.idmap | 19 ++++++ q2_types/reference_db/tests/test_format.py | 58 ++++++++++++++++--- 7 files changed, 177 insertions(+), 8 deletions(-) create mode 100644 q2_types/reference_db/tests/data/hmmer/invalid_idmaps/1.hmm.idmap create mode 100644 q2_types/reference_db/tests/data/hmmer/invalid_idmaps/2.hmm.idmap create mode 100644 q2_types/reference_db/tests/data/hmmer/invalid_idmaps/3.hmm.idmap create mode 100644 q2_types/reference_db/tests/data/hmmer/invalid_idmaps/4.hmm.idmap diff --git a/q2_types/reference_db/_format.py b/q2_types/reference_db/_format.py index ecc2e9ee..51fa9ed6 100644 --- a/q2_types/reference_db/_format.py +++ b/q2_types/reference_db/_format.py @@ -303,6 +303,36 @@ def _validate_(self, level): pass +class HmmerIdmapFileFmt(model.TextFileFormat): + def _validate_(self, level): + with open(str(self), 'r') as file: + # Set the number of rows to be parsed + max_lines = {"min": 100, "max": 10000000}[level] + lines = file.readlines() + for i, line in enumerate(lines, 1): + # Check number of lines parsed so far + if i > max_lines: + break + + # Validate line + if not re.match(r'^(\d+) ([A-Z0-9]+)$', line): + raise ValidationError( + f"Invalid line {i}.\n" + f"{line} \n" + "Expected index and an alphanumeric code separated " + "by a single space." + ) + + # Check index is equal to line number + idx, code = line.rstrip("\n").split(sep=" ") + if not idx == str(i): + raise ValidationError( + f"Invalid line {i}.\n" + f"{line} \n" + f"Expected index {i} but got {idx} instead.\n" + ) + + class HmmerDirFmt(model.DirectoryFormat): """ The .h3m file contains the profile HMMs @@ -320,7 +350,7 @@ class HmmerDirFmt(model.DirectoryFormat): h3i = model.File(r'.*\.hmm\.h3i', format=HmmerBinaryFileFmt) h3f = model.File(r'.*\.hmm\.h3f', format=HmmerBinaryFileFmt) h3p = model.File(r'.*\.hmm\.h3p', format=HmmerBinaryFileFmt) - idmap = model.File(r'.*\.hmm\.idmap', format=HmmerBinaryFileFmt) + idmap = model.File(r'.*\.hmm\.idmap', format=HmmerIdmapFileFmt) fasta_files = model.FileCollection( r'.*\.(fa|fasta|faa)$', format=ProteinFASTAFormat, diff --git a/q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.idmap b/q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.idmap index e69de29b..4e7e0050 100644 --- a/q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.idmap +++ b/q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.idmap @@ -0,0 +1,19 @@ +1 1FKAT +2 1FIZK +3 1FIY1 +4 1FKA5 +5 1FIYP +6 1FK7D +7 1FIX5 +8 1FKCK +9 1FIXT +10 1FKBX +11 1FIYG +12 1FKAC +13 1FKB9 +14 1FK72 +15 1FK4H +16 1FK7S +17 1FK66 +18 1FK6W +19 1FIXC \ No newline at end of file diff --git a/q2_types/reference_db/tests/data/hmmer/invalid_idmaps/1.hmm.idmap b/q2_types/reference_db/tests/data/hmmer/invalid_idmaps/1.hmm.idmap new file mode 100644 index 00000000..d8f7bc5c --- /dev/null +++ b/q2_types/reference_db/tests/data/hmmer/invalid_idmaps/1.hmm.idmap @@ -0,0 +1,19 @@ +1 1FKAT:"%#@ +2 1FIZK +3 1FIY1 +4 1FKA5 +5 1FIYP +6 1FK7D +7 1FIX5 +8 1FKCK +9 1FIXT +10 1FKBX +11 1FIYG +12 1FKAC +13 1FKB9 +14 1FK72 +15 1FK4H +16 1FK7S +17 1FK66 +18 1FK6W +19 1FIXC \ No newline at end of file diff --git a/q2_types/reference_db/tests/data/hmmer/invalid_idmaps/2.hmm.idmap b/q2_types/reference_db/tests/data/hmmer/invalid_idmaps/2.hmm.idmap new file mode 100644 index 00000000..ea1cb3ba --- /dev/null +++ b/q2_types/reference_db/tests/data/hmmer/invalid_idmaps/2.hmm.idmap @@ -0,0 +1,19 @@ +1 1FKAT +2 1FIZK +3 1FIY1 +4 1FKA5 +5 1FIYP +6 1FK7D +7 1FIX5 +8 1FKCK +9 1FIXT +10 1FKBX +11 1FIYG +12 1FKAC +13 1FKB9 +14 1FK72 +15 1FK4H +16 1FK7S +17 1FK66 +18 1FK6W +19 1FIXC \ No newline at end of file diff --git a/q2_types/reference_db/tests/data/hmmer/invalid_idmaps/3.hmm.idmap b/q2_types/reference_db/tests/data/hmmer/invalid_idmaps/3.hmm.idmap new file mode 100644 index 00000000..1c28be88 --- /dev/null +++ b/q2_types/reference_db/tests/data/hmmer/invalid_idmaps/3.hmm.idmap @@ -0,0 +1,19 @@ +1 1FKAT +2 1FIZK +3 1FIY1 +4 1FKA5 +5 1FIYP +6 1FK7D +7 1FIX5 +8 1FKCK +9 1FIXT +10 1FKBX +11 1FIYG +12 1FKAC +13 1FKB9 +14 1FK72 +15 1FK4H +16 1FK7S +17 1FK66 +18 1FK6W +20 1FIXC \ No newline at end of file diff --git a/q2_types/reference_db/tests/data/hmmer/invalid_idmaps/4.hmm.idmap b/q2_types/reference_db/tests/data/hmmer/invalid_idmaps/4.hmm.idmap new file mode 100644 index 00000000..260b29b8 --- /dev/null +++ b/q2_types/reference_db/tests/data/hmmer/invalid_idmaps/4.hmm.idmap @@ -0,0 +1,19 @@ +1FKAT +2 1FIZK +3 1FIY1 +4 1FKA5 +5 1FIYP +6 1FK7D +7 1FIX5 +8 1FKCK +9 1FIXT +10 1FKBX +11 1FIYG +12 1FKAC +13 1FKB9 +14 1FK72 +15 1FK4H +16 1FK7S +17 1FK66 +18 1FK6W +19 1FIXC \ No newline at end of file diff --git a/q2_types/reference_db/tests/test_format.py b/q2_types/reference_db/tests/test_format.py index e9489f31..a9ef27d9 100644 --- a/q2_types/reference_db/tests/test_format.py +++ b/q2_types/reference_db/tests/test_format.py @@ -13,7 +13,8 @@ DiamondDatabaseFileFmt, DiamondDatabaseDirFmt, EggnogRefBinFileFmt, EggnogRefDirFmt, NCBITaxonomyNamesFormat, NCBITaxonomyNodesFormat, NCBITaxonomyDirFmt, NCBITaxonomyBinaryFileFmt, - EggnogProteinSequencesDirFmt, EggnogRefTextFileFmt, HmmerDirFmt + EggnogProteinSequencesDirFmt, EggnogRefTextFileFmt, HmmerDirFmt, + HmmerIdmapFileFmt ) from qiime2.plugin import ValidationError @@ -154,16 +155,59 @@ def test_EggnogRefTextFileFmt_invalid_taxid_lineage(self): ): fmt_obj.validate() - def test_HmmerDirFmt(self): - fmt = HmmerDirFmt(self.get_data_path("hmmer"), 'r') + def test_HmmerDirFmt_valid(self): + fmt = HmmerDirFmt(self.get_data_path("hmmer/bacteria"), 'r') fmt.validate() + def test_HmmerDirFmt_invalid_idmap_1(self): + fmt = HmmerIdmapFileFmt(self.get_data_path( + "hmmer/invalid_idmaps/1.hmm.idmap"), 'r' + ) + with self.assertRaisesRegex( + ValidationError, + "Expected index and an alphanumeric code separated " + "by a single space." + ): + fmt.validate(level="min") + + def test_HmmerDirFmt_invalid_idmap_2(self): + fmt = HmmerIdmapFileFmt(self.get_data_path( + "hmmer/invalid_idmaps/2.hmm.idmap"), 'r' + ) + with self.assertRaisesRegex( + ValidationError, + "Expected index and an alphanumeric code separated " + "by a single space." + ): + fmt.validate(level="min") + + def test_HmmerDirFmt_invalid_idmap_3(self): + fmt = HmmerIdmapFileFmt(self.get_data_path( + "hmmer/invalid_idmaps/3.hmm.idmap"), 'r' + ) + with self.assertRaisesRegex( + ValidationError, + 'Expected index' + ): + fmt.validate(level="min") + + def test_HmmerDirFmt_invalid_idmap_4(self): + fmt = HmmerIdmapFileFmt(self.get_data_path( + "hmmer/invalid_idmaps/4.hmm.idmap"), 'r' + ) + with self.assertRaisesRegex( + ValidationError, + "Expected index and an alphanumeric code separated " + "by a single space." + ): + fmt.validate(level="min") + def test_HmmerDirFmt_missing_hmm(self): with tempfile.TemporaryDirectory() as tmp: shutil.copytree( - self.get_data_path("hmmer"), tmp, dirs_exist_ok=True + self.get_data_path("hmmer/bacteria"), tmp, dirs_exist_ok=True ) - os.remove(f"{tmp}/bacteria/bacteria.hmm.h3f") + os.remove(f"{tmp}/bacteria.hmm.h3f") fmt = HmmerDirFmt(tmp, 'r') with self.assertRaisesRegex( ValidationError, "Missing one or more files" @@ -173,10 +217,10 @@ def test_HmmerDirFmt_missing_hmm(self): def test_HmmerDirFmt_missing_fa(self): with tempfile.TemporaryDirectory() as tmp: shutil.copytree( - self.get_data_path("hmmer"), tmp, dirs_exist_ok=True + self.get_data_path("hmmer/bacteria"), tmp, dirs_exist_ok=True ) for file in ["a", "b", "b2"]: - os.remove(f"{tmp}/bacteria/{file}.fa") + os.remove(f"{tmp}/{file}.fa") fmt = HmmerDirFmt(tmp, 'r') with self.assertRaisesRegex( ValidationError, "Missing one or more files" From 1fd9c9bf8fedd9a5d6d979bfd86ec520770f765e Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Thu, 30 May 2024 17:33:14 +0200 Subject: [PATCH 04/28] work in progress. hmm file parser --- q2_types/reference_db/_format.py | 194 ++++++++++++++++++++++++++++--- q2_types/reference_db/_type.py | 6 +- 2 files changed, 185 insertions(+), 15 deletions(-) diff --git a/q2_types/reference_db/_format.py b/q2_types/reference_db/_format.py index 51fa9ed6..cf67dbb3 100644 --- a/q2_types/reference_db/_format.py +++ b/q2_types/reference_db/_format.py @@ -14,7 +14,7 @@ from q2_types.plugin_setup import plugin from q2_types.reference_db._type import ( ReferenceDB, Eggnog, Diamond, NCBITaxonomy, - EggnogProteinSequences, HMMER + EggnogProteinSequences, HMMER, HMMERpressed ) from q2_types.feature_data import ( MixedCaseProteinFASTAFormat, ProteinFASTAFormat @@ -333,7 +333,19 @@ def _validate_(self, level): ) -class HmmerDirFmt(model.DirectoryFormat): +class HmmerBaseDirFmt(model.DirectoryFormat): + fasta_files = model.FileCollection( + r'.*\.(fa|fasta|faa)$', + format=ProteinFASTAFormat, + optional=False, + ) + + @fasta_files.set_path_maker + def fasta_files_path_maker(self, name): + return str(name) + + +class HmmerPressedDirFmt(HmmerBaseDirFmt): """ The .h3m file contains the profile HMMs and their annotation in a binary format. The .h3i file is an @@ -341,27 +353,181 @@ class HmmerDirFmt(model.DirectoryFormat): precomputed data structures for the fast heuristic filter (the MSV filter). The .h3p file contains precomputed data structures for the rest of each profile. - - - Dont know what the idmap file is for. - - Also dont know why there are fasta files but they are needed for - eggnog-hmmer-search action in q2-moshpit. """ h3m = model.File(r'.*\.hmm\.h3m', format=HmmerBinaryFileFmt) h3i = model.File(r'.*\.hmm\.h3i', format=HmmerBinaryFileFmt) h3f = model.File(r'.*\.hmm\.h3f', format=HmmerBinaryFileFmt) h3p = model.File(r'.*\.hmm\.h3p', format=HmmerBinaryFileFmt) - idmap = model.File(r'.*\.hmm\.idmap', format=HmmerIdmapFileFmt) - fasta_files = model.FileCollection( - r'.*\.(fa|fasta|faa)$', - format=ProteinFASTAFormat, - optional=False, + idmap = model.File( + r'.*\.hmm\.idmap', format=HmmerIdmapFileFmt, optional=True ) - @fasta_files.set_path_maker - def fasta_files_path_maker(self, name): + +class HmmFileFmt(model.TextFileFormat): + alphabets = { + "AMINO": "ACDEFGHIKLMNPQRSTVWY", + "DNA": "ACGT", + "RNA": "ACGU" + } + is_valid_value = { + "HMMER2.0": lambda x: re.match(r"^.+$", x), + "HMMER3/a": lambda x: re.match(r"^.+$", x), + "HMMER3/b": lambda x: re.match(r"^.+$", x), + "HMMER3/c": lambda x: re.match(r"^.+$", x), + "HMMER3/d": lambda x: re.match(r"^.+$", x), + "HMMER3/e": lambda x: re.match(r"^.+$", x), + "HMMER3/f": lambda x: re.match(r"^.+$", x), + "NAME": lambda x: re.match(r"^\S+$", x), + "ACC": lambda x: re.match(r"^\w+$", x), + "DESC": lambda x: re.match(r"^.+$", x), + "LENG": lambda x: re.match(r"^\d+$", x), + "MAXL": lambda x: re.match(r"^\d+$", x), + "ALPH": lambda x: re.match(r"^(amino|DNA|RNA)$", x, re.IGNORECASE), + "RF": lambda x: re.match(r"^(yes|no)$", x, re.IGNORECASE), + "MM": lambda x: re.match(r"^(yes|no)$", x, re.IGNORECASE), + "CONS": lambda x: re.match(r"^(yes|no)$", x, re.IGNORECASE), + "CS": lambda x: re.match(r"^(yes|no)$", x, re.IGNORECASE), + "MAP": lambda x: re.match(r"^(yes|no)$", x, re.IGNORECASE), + "DATE": lambda x: re.match(r"^.+$", x), + "COM": lambda x: re.match(r"^\d+ \w+$", x), + "NSEQ": lambda x: re.match(r"^\d+$", x), + "EFFN": lambda x: re.match(r"^\d+\.?\d+$", x), + "CKSUM": lambda x: re.match(r"^\d+$", x), + "GA": lambda x: re.match(r"^(\d+\.?\d+) (\d+\.?\d+)$", x), + "TC": lambda x: re.match(r"^(\d+\.?\d+) (\d+\.?\d+)$", x), + "NC": lambda x: re.match(r"^(\d+\.?\d+) (\d+\.?\d+)$", x), + "STATS": lambda x: re.match( + r"^LOCAL (MSV|VITERBI|FORWARD) (\d+\.?\d+) (\d+\.?\d+)$", x + ), + "HMM": lambda x: re.match(r"^.+$", x), + "COMPO": lambda x: re.match(r"^(\d+\.?\d+ ?)+$", x), + } + + def _parse_header(self, lines): + tag_values = {} + for line in lines: + tag, value = (re.split(r"\s+", line, 1)) + tag_values[tag] = value + + # check that all mandatory tags are present + mandatory_tags = {"NAME", "LENG", "ALPH", "HMM"} + HMMER_tags = {[ + f"HMMER{i}" + for i in ["3/a", "3/b", "3/c", "3/d", "3/e", "3/f", "2.0"] + ]} + tags_in_header = tag_values.keys() + if not ( + mandatory_tags.issubset(tags_in_header) and + len(HMMER_tags.intersection(tags_in_header)) == 1 + ): + raise ValidationError( + "Missing tag(s) in header: \n" + f"{mandatory_tags.difference(tags_in_header)} \n" + "Printing lines: \n" + f"{lines}" + ) + + for tag, value in tag_values.items(): + if not self.is_valid_value[tag](value): + raise ValidationError( + f"Invalid value '{value}' for tag '{tag}'\n" + "Printing lines: \n" + f"{lines}" + ) + + # Validate alphabet + expected_alph = self.alphabets[tag_values["ALPH"].upper()] + observed_alph = "".join(re.split(r"\s+", tag_values["HMM"])) + if observed_alph != expected_alph: + raise ValidationError( + f"Invalid alphabet." + f"Expected: {self.alph}\n" + f"Observed: {observed_alph}\n" + ) + + # Save alphabet length + self.alph_len = len(observed_alph) + + def _parse_body(self): + """ + Parse the HMMER profile section of the file + """ + + def _validate_(self, level): + """ + Check http://eddylab.org/software/hmmer/Userguide.pdf + section "HMMER profile HMM files" for full description of + hmm file format. + """ + + with open(str(self), 'r') as file: + # Check if hmm file has more than one profile + profiles_found = 0 + parse_n_profiles = 1 + for line in file: + if line.startswith("//"): + profiles_found += 1 + if profiles_found > 1: + # If more than one profile is found use level to set + # the number of profiles to parse + parse_n_profiles = {"min": 3, "max": 300000}[level] + break + + # Reset cursor to beginning of file + file.seek(0) + + # Parse + profiles_parsed = 0 + while profiles_parsed < parse_n_profiles: + # Validate header + header = [] + for line in file: + header.append(line) + if line.startswith("HMM"): + break + self._parse_header(header) + + # Consume column headers for the state transition probability + # fields + observed_headers = set(re.split(r"\s+", file.readline())) + expected_headers = { + "m->m", "m->i", "m->d", "i->m", "i->i", "d->m", "d->d" + } + if observed_headers != expected_headers: + raise ValidationError( + f"Invalid headers." + f"Expected: {expected_headers}\n" + f"Observed: {observed_headers}\n" + ) + + # Validate HMMER model + body = [] + for line in file: + if line.startswith("//"): + break + else: + body.append(line) + self._parse_body(body) + + # Increase count of parsed profiles + profiles_parsed += 1 + + +class HmmerDirFmt(HmmerBaseDirFmt): + """ + One or more HMMER profile files. + """ + hmm_files = model.FileCollection( + r'.*\.(hmm)$', format=HmmFileFmt + ) + + @hmm_files.set_path_maker + def hmm_files_path_maker(self, name): return str(name) -plugin.register_formats(HmmerDirFmt) +plugin.register_formats(HmmerDirFmt, HmmerPressedDirFmt) plugin.register_semantic_type_to_format(ReferenceDB[HMMER], HmmerDirFmt) +plugin.register_semantic_type_to_format(ReferenceDB[HMMERpressed], + HmmerPressedDirFmt) diff --git a/q2_types/reference_db/_type.py b/q2_types/reference_db/_type.py index 0362bf38..67ce3add 100644 --- a/q2_types/reference_db/_type.py +++ b/q2_types/reference_db/_type.py @@ -22,6 +22,10 @@ HMMER = SemanticType( 'HMMER', variant_of=ReferenceDB.field['type'] ) +HMMERpressed = SemanticType( + 'HMMERpressed', variant_of=ReferenceDB.field['type'] +) plugin.register_semantic_types( - ReferenceDB, Diamond, Eggnog, NCBITaxonomy, EggnogProteinSequences, HMMER + ReferenceDB, Diamond, Eggnog, NCBITaxonomy, EggnogProteinSequences, HMMER, + HMMERpressed ) From f1ee960b023ad0193744c26de32f7b76ec082f28 Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Fri, 31 May 2024 11:54:02 +0200 Subject: [PATCH 05/28] use the pyhmmer validation for hmm files --- q2_types/reference_db/_format.py | 150 ++----------------------------- 1 file changed, 7 insertions(+), 143 deletions(-) diff --git a/q2_types/reference_db/_format.py b/q2_types/reference_db/_format.py index cf67dbb3..7fae92fe 100644 --- a/q2_types/reference_db/_format.py +++ b/q2_types/reference_db/_format.py @@ -5,10 +5,9 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- - - import gzip import re +from pyhmmer.plan7 import HMMFile from qiime2.plugin import model from qiime2.core.exceptions import ValidationError from q2_types.plugin_setup import plugin @@ -364,153 +363,18 @@ class HmmerPressedDirFmt(HmmerBaseDirFmt): class HmmFileFmt(model.TextFileFormat): - alphabets = { - "AMINO": "ACDEFGHIKLMNPQRSTVWY", - "DNA": "ACGT", - "RNA": "ACGU" - } - is_valid_value = { - "HMMER2.0": lambda x: re.match(r"^.+$", x), - "HMMER3/a": lambda x: re.match(r"^.+$", x), - "HMMER3/b": lambda x: re.match(r"^.+$", x), - "HMMER3/c": lambda x: re.match(r"^.+$", x), - "HMMER3/d": lambda x: re.match(r"^.+$", x), - "HMMER3/e": lambda x: re.match(r"^.+$", x), - "HMMER3/f": lambda x: re.match(r"^.+$", x), - "NAME": lambda x: re.match(r"^\S+$", x), - "ACC": lambda x: re.match(r"^\w+$", x), - "DESC": lambda x: re.match(r"^.+$", x), - "LENG": lambda x: re.match(r"^\d+$", x), - "MAXL": lambda x: re.match(r"^\d+$", x), - "ALPH": lambda x: re.match(r"^(amino|DNA|RNA)$", x, re.IGNORECASE), - "RF": lambda x: re.match(r"^(yes|no)$", x, re.IGNORECASE), - "MM": lambda x: re.match(r"^(yes|no)$", x, re.IGNORECASE), - "CONS": lambda x: re.match(r"^(yes|no)$", x, re.IGNORECASE), - "CS": lambda x: re.match(r"^(yes|no)$", x, re.IGNORECASE), - "MAP": lambda x: re.match(r"^(yes|no)$", x, re.IGNORECASE), - "DATE": lambda x: re.match(r"^.+$", x), - "COM": lambda x: re.match(r"^\d+ \w+$", x), - "NSEQ": lambda x: re.match(r"^\d+$", x), - "EFFN": lambda x: re.match(r"^\d+\.?\d+$", x), - "CKSUM": lambda x: re.match(r"^\d+$", x), - "GA": lambda x: re.match(r"^(\d+\.?\d+) (\d+\.?\d+)$", x), - "TC": lambda x: re.match(r"^(\d+\.?\d+) (\d+\.?\d+)$", x), - "NC": lambda x: re.match(r"^(\d+\.?\d+) (\d+\.?\d+)$", x), - "STATS": lambda x: re.match( - r"^LOCAL (MSV|VITERBI|FORWARD) (\d+\.?\d+) (\d+\.?\d+)$", x - ), - "HMM": lambda x: re.match(r"^.+$", x), - "COMPO": lambda x: re.match(r"^(\d+\.?\d+ ?)+$", x), - } - - def _parse_header(self, lines): - tag_values = {} - for line in lines: - tag, value = (re.split(r"\s+", line, 1)) - tag_values[tag] = value - - # check that all mandatory tags are present - mandatory_tags = {"NAME", "LENG", "ALPH", "HMM"} - HMMER_tags = {[ - f"HMMER{i}" - for i in ["3/a", "3/b", "3/c", "3/d", "3/e", "3/f", "2.0"] - ]} - tags_in_header = tag_values.keys() - if not ( - mandatory_tags.issubset(tags_in_header) and - len(HMMER_tags.intersection(tags_in_header)) == 1 - ): - raise ValidationError( - "Missing tag(s) in header: \n" - f"{mandatory_tags.difference(tags_in_header)} \n" - "Printing lines: \n" - f"{lines}" - ) - - for tag, value in tag_values.items(): - if not self.is_valid_value[tag](value): - raise ValidationError( - f"Invalid value '{value}' for tag '{tag}'\n" - "Printing lines: \n" - f"{lines}" - ) - - # Validate alphabet - expected_alph = self.alphabets[tag_values["ALPH"].upper()] - observed_alph = "".join(re.split(r"\s+", tag_values["HMM"])) - if observed_alph != expected_alph: - raise ValidationError( - f"Invalid alphabet." - f"Expected: {self.alph}\n" - f"Observed: {observed_alph}\n" - ) - - # Save alphabet length - self.alph_len = len(observed_alph) - - def _parse_body(self): - """ - Parse the HMMER profile section of the file - """ - def _validate_(self, level): """ Check http://eddylab.org/software/hmmer/Userguide.pdf section "HMMER profile HMM files" for full description of hmm file format. """ - - with open(str(self), 'r') as file: - # Check if hmm file has more than one profile - profiles_found = 0 - parse_n_profiles = 1 - for line in file: - if line.startswith("//"): - profiles_found += 1 - if profiles_found > 1: - # If more than one profile is found use level to set - # the number of profiles to parse - parse_n_profiles = {"min": 3, "max": 300000}[level] - break - - # Reset cursor to beginning of file - file.seek(0) - - # Parse - profiles_parsed = 0 - while profiles_parsed < parse_n_profiles: - # Validate header - header = [] - for line in file: - header.append(line) - if line.startswith("HMM"): - break - self._parse_header(header) - - # Consume column headers for the state transition probability - # fields - observed_headers = set(re.split(r"\s+", file.readline())) - expected_headers = { - "m->m", "m->i", "m->d", "i->m", "i->i", "d->m", "d->d" - } - if observed_headers != expected_headers: - raise ValidationError( - f"Invalid headers." - f"Expected: {expected_headers}\n" - f"Observed: {observed_headers}\n" - ) - - # Validate HMMER model - body = [] - for line in file: - if line.startswith("//"): - break - else: - body.append(line) - self._parse_body(body) - - # Increase count of parsed profiles - profiles_parsed += 1 + parse_n_profiles = {"min": 3, "max": None}[level] + tolerance = 0.0001 + with HMMFile(str(self)) as hmm_file: + hmm_profiles = list(hmm_file) + for hmm_profile in hmm_profiles[:parse_n_profiles]: + hmm_profile.validate(tolerance=tolerance) class HmmerDirFmt(HmmerBaseDirFmt): From cb20829a0e4b609f815f031d3b846eb01b3bf503 Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Fri, 31 May 2024 15:54:45 +0200 Subject: [PATCH 06/28] made st and formats or each alphabet type --- q2_types/reference_db/_format.py | 168 +++++++++++++++++++++++-------- q2_types/reference_db/_type.py | 24 +++-- 2 files changed, 144 insertions(+), 48 deletions(-) diff --git a/q2_types/reference_db/_format.py b/q2_types/reference_db/_format.py index 7fae92fe..268a69fd 100644 --- a/q2_types/reference_db/_format.py +++ b/q2_types/reference_db/_format.py @@ -12,12 +12,10 @@ from qiime2.core.exceptions import ValidationError from q2_types.plugin_setup import plugin from q2_types.reference_db._type import ( - ReferenceDB, Eggnog, Diamond, NCBITaxonomy, - EggnogProteinSequences, HMMER, HMMERpressed -) -from q2_types.feature_data import ( - MixedCaseProteinFASTAFormat, ProteinFASTAFormat + ReferenceDB, Eggnog, Diamond, NCBITaxonomy, EggnogProteinSequences, + aminoHMM, dnaHMM, rnaHMM, aminoHMMpressed, rnaHMMpressed, dnaHMMpressed ) +from q2_types.feature_data import MixedCaseProteinFASTAFormat class EggnogRefTextFileFmt(model.TextFileFormat): @@ -297,12 +295,12 @@ class EggnogProteinSequencesDirFmt(model.DirectoryFormat): EggnogProteinSequencesDirFmt) -class HmmerBinaryFileFmt(model.BinaryFileFormat): +class HmmBinaryFileFmt(model.BinaryFileFormat): def _validate_(self, level): pass -class HmmerIdmapFileFmt(model.TextFileFormat): +class HmmIdmapFileFmt(model.TextFileFormat): def _validate_(self, level): with open(str(self), 'r') as file: # Set the number of rows to be parsed @@ -332,19 +330,7 @@ def _validate_(self, level): ) -class HmmerBaseDirFmt(model.DirectoryFormat): - fasta_files = model.FileCollection( - r'.*\.(fa|fasta|faa)$', - format=ProteinFASTAFormat, - optional=False, - ) - - @fasta_files.set_path_maker - def fasta_files_path_maker(self, name): - return str(name) - - -class HmmerPressedDirFmt(HmmerBaseDirFmt): +class BaseHmmPressedDirFmt(model.directory_format): """ The .h3m file contains the profile HMMs and their annotation in a binary format. The .h3i file is an @@ -353,17 +339,44 @@ class HmmerPressedDirFmt(HmmerBaseDirFmt): (the MSV filter). The .h3p file contains precomputed data structures for the rest of each profile. """ - h3m = model.File(r'.*\.hmm\.h3m', format=HmmerBinaryFileFmt) - h3i = model.File(r'.*\.hmm\.h3i', format=HmmerBinaryFileFmt) - h3f = model.File(r'.*\.hmm\.h3f', format=HmmerBinaryFileFmt) - h3p = model.File(r'.*\.hmm\.h3p', format=HmmerBinaryFileFmt) + h3m = model.File(r'.*\.hmm\.h3m', format=HmmBinaryFileFmt) + h3i = model.File(r'.*\.hmm\.h3i', format=HmmBinaryFileFmt) + h3f = model.File(r'.*\.hmm\.h3f', format=HmmBinaryFileFmt) + h3p = model.File(r'.*\.hmm\.h3p', format=HmmBinaryFileFmt) idmap = model.File( - r'.*\.hmm\.idmap', format=HmmerIdmapFileFmt, optional=True + r'.*\.hmm\.idmap', format=HmmIdmapFileFmt, optional=True ) -class HmmFileFmt(model.TextFileFormat): - def _validate_(self, level): +class AminoHmmPressedDirFmt(BaseHmmPressedDirFmt): + alphabet = "amino" + + +class DnaHmmPressedDirFmt(BaseHmmPressedDirFmt): + alphabet = "dna" + + +class RnaHmmPressedDirFmt(BaseHmmPressedDirFmt): + alphabet = "rna" + + +plugin.register_semantic_type_to_format( + ReferenceDB[aminoHMMpressed], AminoHmmPressedDirFmt +) + +plugin.register_semantic_type_to_format( + ReferenceDB[dnaHMMpressed], AminoHmmPressedDirFmt +) + +plugin.register_semantic_type_to_format( + ReferenceDB[rnaHMMpressed], AminoHmmPressedDirFmt +) + + +class HmmBaseFileFmt(model.TextFileFormat): + def _validate_file_fmt( + self, level: str, alphabet: str, single_profile: bool + ): """ Check http://eddylab.org/software/hmmer/Userguide.pdf section "HMMER profile HMM files" for full description of @@ -371,27 +384,98 @@ def _validate_(self, level): """ parse_n_profiles = {"min": 3, "max": None}[level] tolerance = 0.0001 + with HMMFile(str(self)) as hmm_file: hmm_profiles = list(hmm_file) + + if len(hmm_profiles) > 1 and single_profile: + raise ValidationError( + f"Expected 1 profile, found {len(hmm_profiles)}." + ) + for hmm_profile in hmm_profiles[:parse_n_profiles]: hmm_profile.validate(tolerance=tolerance) + if hmm_profile.alphabet.lower() != alphabet: + raise ValidationError( + "Found profile with alphabet: " + f"{hmm_profile.alph.lower()}\n" + f"{self.__class__} only accepts {alphabet} profiles." + ) + -class HmmerDirFmt(HmmerBaseDirFmt): - """ - One or more HMMER profile files. - """ - hmm_files = model.FileCollection( - r'.*\.(hmm)$', format=HmmFileFmt - ) +class AminoHmmFileFmt(HmmBaseFileFmt): + alphabet = "amino" - @hmm_files.set_path_maker - def hmm_files_path_maker(self, name): - return str(name) + def _validate_(self, level): + self._validate_file_fmt(self, level, self.alphabet, True) + + +class DnaHmmFileFmt(HmmBaseFileFmt): + alphabet = "dna" + def _validate_(self, level): + self._validate_file_fmt(self, level, self.alphabet, True) + + +class RnaHmmFileFmt(HmmBaseFileFmt): + alphabet = "rna" + + def _validate_(self, level): + self._validate_file_fmt(self, level, self.alphabet, True) + + +DifferentialDirectoryFormat = model.SingleFileDirectoryFormat( + 'AminoHmmFileFmt', 'profile.hmm', AminoHmmFileFmt) + +DifferentialDirectoryFormat = model.SingleFileDirectoryFormat( + 'DnaHmmFileFmt', 'profile.hmm', DnaHmmFileFmt) + +DifferentialDirectoryFormat = model.SingleFileDirectoryFormat( + 'RnaHmmFileFmt', 'profile.hmm', RnaHmmFileFmt) + +plugin.register_formats(AminoHmmFileFmt, DnaHmmFileFmt, RnaHmmFileFmt) + + +class HmmAminoDBFileFmt(AminoHmmFileFmt): + def _validate_(self, level): + self._validate_file_fmt(self, level, self.alphabet, False) + + +class HmmDnaDBFileFmt(DnaHmmFileFmt): + def _validate_(self, level): + self._validate_file_fmt(self, level, self.alphabet, False) + + +class HmmRnaDBFileFmt(RnaHmmFileFmt): + def _validate_(self, level): + self._validate_file_fmt(self, level, self.alphabet, False) -plugin.register_formats(HmmerDirFmt, HmmerPressedDirFmt) -plugin.register_semantic_type_to_format(ReferenceDB[HMMER], - HmmerDirFmt) -plugin.register_semantic_type_to_format(ReferenceDB[HMMERpressed], - HmmerPressedDirFmt) + +DifferentialDirectoryFormat = model.SingleFileDirectoryFormat( + 'HmmAminoDBFileFmt', 'profile.hmm', HmmAminoDBFileFmt +) + +DifferentialDirectoryFormat = model.SingleFileDirectoryFormat( + 'HmmDnaDBFileFmt', 'profile.hmm', HmmDnaDBFileFmt +) + +DifferentialDirectoryFormat = model.SingleFileDirectoryFormat( + 'HmmRnaDBFileFmt', 'profile.hmm', HmmRnaDBFileFmt +) + +plugin.register_formats( + HmmAminoDBFileFmt, HmmDnaDBFileFmt, HmmRnaDBFileFmt +) + +plugin.register_semantic_type_to_format( + ReferenceDB[aminoHMM], HmmAminoDBFileFmt +) + +plugin.register_semantic_type_to_format( + ReferenceDB[dnaHMM], HmmAminoDBFileFmt +) + +plugin.register_semantic_type_to_format( + ReferenceDB[rnaHMM], HmmAminoDBFileFmt +) diff --git a/q2_types/reference_db/_type.py b/q2_types/reference_db/_type.py index 67ce3add..d694d95e 100644 --- a/q2_types/reference_db/_type.py +++ b/q2_types/reference_db/_type.py @@ -19,13 +19,25 @@ EggnogProteinSequences = SemanticType( 'EggnogProteinSequences', variant_of=ReferenceDB.field['type'] ) -HMMER = SemanticType( - 'HMMER', variant_of=ReferenceDB.field['type'] +aminoHMM = SemanticType( + 'aminoHMM', variant_of=ReferenceDB.field['type'] ) -HMMERpressed = SemanticType( - 'HMMERpressed', variant_of=ReferenceDB.field['type'] +dnaHMM = SemanticType( + 'dnaHMM', variant_of=ReferenceDB.field['type'] +) +rnaHMM = SemanticType( + 'rnaHMM', variant_of=ReferenceDB.field['type'] +) +aminoHMMpressed = SemanticType( + 'aminoHMMpressed', variant_of=ReferenceDB.field['type'] +) +dnaHMMpressed = SemanticType( + 'dnaHMMpressed', variant_of=ReferenceDB.field['type'] +) +rnaHMMpressed = SemanticType( + 'rnaHMMpressed', variant_of=ReferenceDB.field['type'] ) plugin.register_semantic_types( - ReferenceDB, Diamond, Eggnog, NCBITaxonomy, EggnogProteinSequences, HMMER, - HMMERpressed + ReferenceDB, Diamond, Eggnog, NCBITaxonomy, EggnogProteinSequences, + aminoHMM, dnaHMM, rnaHMM, aminoHMMpressed, rnaHMMpressed, dnaHMMpressed ) From d004015fe67095bd1bafc991a695b061ce50d5e1 Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Fri, 31 May 2024 16:18:27 +0200 Subject: [PATCH 07/28] renamed formats and st --- q2_types/reference_db/_format.py | 56 ++++++++++++++------------------ 1 file changed, 24 insertions(+), 32 deletions(-) diff --git a/q2_types/reference_db/_format.py b/q2_types/reference_db/_format.py index 268a69fd..7f80e02f 100644 --- a/q2_types/reference_db/_format.py +++ b/q2_types/reference_db/_format.py @@ -348,28 +348,16 @@ class BaseHmmPressedDirFmt(model.directory_format): ) -class AminoHmmPressedDirFmt(BaseHmmPressedDirFmt): - alphabet = "amino" - - -class DnaHmmPressedDirFmt(BaseHmmPressedDirFmt): - alphabet = "dna" - - -class RnaHmmPressedDirFmt(BaseHmmPressedDirFmt): - alphabet = "rna" - - plugin.register_semantic_type_to_format( - ReferenceDB[aminoHMMpressed], AminoHmmPressedDirFmt + ReferenceDB[aminoHMMpressed], BaseHmmPressedDirFmt ) plugin.register_semantic_type_to_format( - ReferenceDB[dnaHMMpressed], AminoHmmPressedDirFmt + ReferenceDB[dnaHMMpressed], BaseHmmPressedDirFmt ) plugin.register_semantic_type_to_format( - ReferenceDB[rnaHMMpressed], AminoHmmPressedDirFmt + ReferenceDB[rnaHMMpressed], BaseHmmPressedDirFmt ) @@ -425,57 +413,61 @@ def _validate_(self, level): self._validate_file_fmt(self, level, self.alphabet, True) -DifferentialDirectoryFormat = model.SingleFileDirectoryFormat( +AminoHmmDirectoryFormat = model.SingleFileDirectoryFormat( 'AminoHmmFileFmt', 'profile.hmm', AminoHmmFileFmt) -DifferentialDirectoryFormat = model.SingleFileDirectoryFormat( +DnaHmmDirectoryFormat = model.SingleFileDirectoryFormat( 'DnaHmmFileFmt', 'profile.hmm', DnaHmmFileFmt) -DifferentialDirectoryFormat = model.SingleFileDirectoryFormat( +RnaHmmDirectoryFormat = model.SingleFileDirectoryFormat( 'RnaHmmFileFmt', 'profile.hmm', RnaHmmFileFmt) -plugin.register_formats(AminoHmmFileFmt, DnaHmmFileFmt, RnaHmmFileFmt) +plugin.register_formats( + AminoHmmFileFmt, DnaHmmFileFmt, RnaHmmFileFmt, + AminoHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat +) -class HmmAminoDBFileFmt(AminoHmmFileFmt): +class AminoHmmDBFileFmt(AminoHmmFileFmt): def _validate_(self, level): self._validate_file_fmt(self, level, self.alphabet, False) -class HmmDnaDBFileFmt(DnaHmmFileFmt): +class DnaHmmDBFileFmt(DnaHmmFileFmt): def _validate_(self, level): self._validate_file_fmt(self, level, self.alphabet, False) -class HmmRnaDBFileFmt(RnaHmmFileFmt): +class RnaHmmDBFileFmt(RnaHmmFileFmt): def _validate_(self, level): self._validate_file_fmt(self, level, self.alphabet, False) -DifferentialDirectoryFormat = model.SingleFileDirectoryFormat( - 'HmmAminoDBFileFmt', 'profile.hmm', HmmAminoDBFileFmt +AminoHmmDbDirectoryFormat = model.SingleFileDirectoryFormat( + 'HmmAminoDBFileFmt', 'profile.hmm', AminoHmmDBFileFmt ) -DifferentialDirectoryFormat = model.SingleFileDirectoryFormat( - 'HmmDnaDBFileFmt', 'profile.hmm', HmmDnaDBFileFmt +DnaHmmDbDirectoryFormat = model.SingleFileDirectoryFormat( + 'HmmDnaDBFileFmt', 'profile.hmm', DnaHmmDBFileFmt ) -DifferentialDirectoryFormat = model.SingleFileDirectoryFormat( - 'HmmRnaDBFileFmt', 'profile.hmm', HmmRnaDBFileFmt +RnaHmmDbDirectoryFormat = model.SingleFileDirectoryFormat( + 'HmmRnaDBFileFmt', 'profile.hmm', RnaHmmDBFileFmt ) plugin.register_formats( - HmmAminoDBFileFmt, HmmDnaDBFileFmt, HmmRnaDBFileFmt + AminoHmmDbDirectoryFormat, DnaHmmDbDirectoryFormat, AminoHmmDBFileFmt, + RnaHmmDbDirectoryFormat, DnaHmmDBFileFmt, RnaHmmDBFileFmt ) plugin.register_semantic_type_to_format( - ReferenceDB[aminoHMM], HmmAminoDBFileFmt + ReferenceDB[aminoHMM], AminoHmmDBFileFmt ) plugin.register_semantic_type_to_format( - ReferenceDB[dnaHMM], HmmAminoDBFileFmt + ReferenceDB[dnaHMM], DnaHmmDBFileFmt ) plugin.register_semantic_type_to_format( - ReferenceDB[rnaHMM], HmmAminoDBFileFmt + ReferenceDB[rnaHMM], RnaHmmDBFileFmt ) From 8acdb8971fe7999f8df01385557689f0874d8e1c Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Fri, 31 May 2024 17:11:32 +0200 Subject: [PATCH 08/28] migrate types and test data to new module. TODO migrate tests --- q2_types/hmmer/__init__.py | 24 +++ q2_types/hmmer/_format.py | 187 ++++++++++++++++++ q2_types/hmmer/_type.py | 89 +++++++++ .../tests/data/hmmer/bacteria/a.fa | 0 .../tests/data/hmmer/bacteria/b.fa | 0 .../tests/data/hmmer/bacteria/b2.fa | 0 .../data/hmmer/bacteria/bacteria.hmm.h3f | 0 .../data/hmmer/bacteria/bacteria.hmm.h3i | 0 .../data/hmmer/bacteria/bacteria.hmm.h3m | 0 .../data/hmmer/bacteria/bacteria.hmm.h3p | 0 .../data/hmmer/bacteria/bacteria.hmm.idmap | 0 .../data/hmmer/invalid_idmaps/1.hmm.idmap | 0 .../data/hmmer/invalid_idmaps/2.hmm.idmap | 0 .../data/hmmer/invalid_idmaps/3.hmm.idmap | 0 .../data/hmmer/invalid_idmaps/4.hmm.idmap | 0 q2_types/reference_db/__init__.py | 6 +- q2_types/reference_db/_format.py | 180 ----------------- q2_types/reference_db/_type.py | 23 +-- 18 files changed, 303 insertions(+), 206 deletions(-) create mode 100644 q2_types/hmmer/__init__.py create mode 100644 q2_types/hmmer/_format.py create mode 100644 q2_types/hmmer/_type.py rename q2_types/{reference_db => hmmer}/tests/data/hmmer/bacteria/a.fa (100%) rename q2_types/{reference_db => hmmer}/tests/data/hmmer/bacteria/b.fa (100%) rename q2_types/{reference_db => hmmer}/tests/data/hmmer/bacteria/b2.fa (100%) rename q2_types/{reference_db => hmmer}/tests/data/hmmer/bacteria/bacteria.hmm.h3f (100%) rename q2_types/{reference_db => hmmer}/tests/data/hmmer/bacteria/bacteria.hmm.h3i (100%) rename q2_types/{reference_db => hmmer}/tests/data/hmmer/bacteria/bacteria.hmm.h3m (100%) rename q2_types/{reference_db => hmmer}/tests/data/hmmer/bacteria/bacteria.hmm.h3p (100%) rename q2_types/{reference_db => hmmer}/tests/data/hmmer/bacteria/bacteria.hmm.idmap (100%) rename q2_types/{reference_db => hmmer}/tests/data/hmmer/invalid_idmaps/1.hmm.idmap (100%) rename q2_types/{reference_db => hmmer}/tests/data/hmmer/invalid_idmaps/2.hmm.idmap (100%) rename q2_types/{reference_db => hmmer}/tests/data/hmmer/invalid_idmaps/3.hmm.idmap (100%) rename q2_types/{reference_db => hmmer}/tests/data/hmmer/invalid_idmaps/4.hmm.idmap (100%) diff --git a/q2_types/hmmer/__init__.py b/q2_types/hmmer/__init__.py new file mode 100644 index 00000000..4ec67566 --- /dev/null +++ b/q2_types/hmmer/__init__.py @@ -0,0 +1,24 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- +from ._format import ( + AminoHmmMultipleProfilesFileFmt, DnaHmmMultipleProfilesFileFmt, + RnaHmmMultipleProfilesFileFmt, AminoHmmMultipleProfilesDirectoryFormat, + DnaHmmMultipleProfilesDirectoryFormat, + RnaHmmMultipleProfilesDirectoryFormat, + AminoHmmFileFmt, DnaHmmFileFmt, RnaHmmFileFmt, + AminoHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat +) + +__all__ = [ + "AminoHmmMultipleProfilesFileFmt", "DnaHmmMultipleProfilesFileFmt", + "RnaHmmMultipleProfilesFileFmt", "AminoHmmMultipleProfilesDirectoryFormat", + "DnaHmmMultipleProfilesDirectoryFormat", + "RnaHmmMultipleProfilesDirectoryFormat", + "AminoHmmFileFmt", "DnaHmmFileFmt", "RnaHmmFileFmt", + "AminoHmmDirectoryFormat", "DnaHmmDirectoryFormat", "RnaHmmDirectoryFormat" +] diff --git a/q2_types/hmmer/_format.py b/q2_types/hmmer/_format.py new file mode 100644 index 00000000..51de6459 --- /dev/null +++ b/q2_types/hmmer/_format.py @@ -0,0 +1,187 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- +import re +from pyhmmer.plan7 import HMMFile +from qiime2.plugin import model +from qiime2.core.exceptions import ValidationError +from q2_types.plugin_setup import plugin +from q2_types.hmmer._type import ( + HMM, MultipleAminoProfilesPressed, MultipleDNAProfilesPressed, + MultipleRNAProfilesPressed +) + + +class HmmBinaryFileFmt(model.BinaryFileFormat): + def _validate_(self, level): + pass + + +class HmmIdmapFileFmt(model.TextFileFormat): + def _validate_(self, level): + with open(str(self), 'r') as file: + # Set the number of rows to be parsed + max_lines = {"min": 100, "max": 10000000}[level] + lines = file.readlines() + for i, line in enumerate(lines, 1): + # Check number of lines parsed so far + if i > max_lines: + break + + # Validate line + if not re.match(r'^(\d+) ([A-Z0-9]+)$', line): + raise ValidationError( + f"Invalid line {i}.\n" + f"{line} \n" + "Expected index and an alphanumeric code separated " + "by a single space." + ) + + # Check index is equal to line number + idx, code = line.rstrip("\n").split(sep=" ") + if not idx == str(i): + raise ValidationError( + f"Invalid line {i}.\n" + f"{line} \n" + f"Expected index {i} but got {idx} instead.\n" + ) + + +class BaseHmmPressedDirFmt(model.directory_format): + """ + The .h3m file contains the profile HMMs + and their annotation in a binary format. The .h3i file is an + SSI index for the .h3m file. The .h3f file contains + precomputed data structures for the fast heuristic filter + (the MSV filter). The .h3p file contains precomputed data + structures for the rest of each profile. + """ + h3m = model.File(r'.*\.hmm\.h3m', format=HmmBinaryFileFmt) + h3i = model.File(r'.*\.hmm\.h3i', format=HmmBinaryFileFmt) + h3f = model.File(r'.*\.hmm\.h3f', format=HmmBinaryFileFmt) + h3p = model.File(r'.*\.hmm\.h3p', format=HmmBinaryFileFmt) + idmap = model.File( + r'.*\.hmm\.idmap', format=HmmIdmapFileFmt, optional=True + ) + + +plugin.register_semantic_type_to_format( + HMM[MultipleAminoProfilesPressed], BaseHmmPressedDirFmt +) + +plugin.register_semantic_type_to_format( + HMM[MultipleDNAProfilesPressed], BaseHmmPressedDirFmt +) + +plugin.register_semantic_type_to_format( + HMM[MultipleRNAProfilesPressed], BaseHmmPressedDirFmt +) + + +class HmmBaseFileFmt(model.TextFileFormat): + def _validate_file_fmt( + self, level: str, alphabet: str, single_profile: bool + ): + """ + Check http://eddylab.org/software/hmmer/Userguide.pdf + section "HMMER profile HMM files" for full description of + hmm file format. + """ + parse_n_profiles = {"min": 3, "max": None}[level] + tolerance = 0.0001 + + with HMMFile(str(self)) as hmm_file: + hmm_profiles = list(hmm_file) + + if len(hmm_profiles) > 1 and single_profile: + raise ValidationError( + f"Expected 1 profile, found {len(hmm_profiles)}." + ) + + for hmm_profile in hmm_profiles[:parse_n_profiles]: + hmm_profile.validate(tolerance=tolerance) + + if hmm_profile.alphabet.lower() != alphabet: + raise ValidationError( + "Found profile with alphabet: " + f"{hmm_profile.alph.lower()}\n" + f"{self.__class__} only accepts {alphabet} profiles." + ) + + +class AminoHmmFileFmt(HmmBaseFileFmt): + alphabet = "amino" + + def _validate_(self, level): + self._validate_file_fmt(self, level, self.alphabet, True) + + +class DnaHmmFileFmt(HmmBaseFileFmt): + alphabet = "dna" + + def _validate_(self, level): + self._validate_file_fmt(self, level, self.alphabet, True) + + +class RnaHmmFileFmt(HmmBaseFileFmt): + alphabet = "rna" + + def _validate_(self, level): + self._validate_file_fmt(self, level, self.alphabet, True) + + +AminoHmmDirectoryFormat = model.SingleFileDirectoryFormat( + 'AminoHmmFileFmt', 'profile.hmm', AminoHmmFileFmt) + +DnaHmmDirectoryFormat = model.SingleFileDirectoryFormat( + 'DnaHmmFileFmt', 'profile.hmm', DnaHmmFileFmt) + +RnaHmmDirectoryFormat = model.SingleFileDirectoryFormat( + 'RnaHmmFileFmt', 'profile.hmm', RnaHmmFileFmt) + + +class AminoHmmMultipleProfilesFileFmt(AminoHmmFileFmt): + def _validate_(self, level): + self._validate_file_fmt(self, level, self.alphabet, False) + + +class DnaHmmMultipleProfilesFileFmt(DnaHmmFileFmt): + def _validate_(self, level): + self._validate_file_fmt(self, level, self.alphabet, False) + + +class RnaHmmMultipleProfilesFileFmt(RnaHmmFileFmt): + def _validate_(self, level): + self._validate_file_fmt(self, level, self.alphabet, False) + + +AminoHmmMultipleProfilesDirectoryFormat = model.SingleFileDirectoryFormat( + 'AminoHmmMultipleProfilesDirectoryFormat', + 'profile.hmm', + AminoHmmMultipleProfilesFileFmt +) + +DnaHmmMultipleProfilesDirectoryFormat = model.SingleFileDirectoryFormat( + 'DnaHmmMultipleProfilesDirectoryFormat', + 'profile.hmm', + DnaHmmMultipleProfilesFileFmt, +) + +RnaHmmMultipleProfilesDirectoryFormat = model.SingleFileDirectoryFormat( + 'RnaHmmMultipleProfilesDirectoryFormat', + 'profile.hmm', + RnaHmmMultipleProfilesFileFmt, +) + +plugin.register_formats( + AminoHmmMultipleProfilesFileFmt, DnaHmmMultipleProfilesFileFmt, + RnaHmmMultipleProfilesFileFmt, AminoHmmMultipleProfilesDirectoryFormat, + DnaHmmMultipleProfilesDirectoryFormat, + RnaHmmMultipleProfilesDirectoryFormat, + AminoHmmFileFmt, DnaHmmFileFmt, RnaHmmFileFmt, + AminoHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat +) diff --git a/q2_types/hmmer/_type.py b/q2_types/hmmer/_type.py new file mode 100644 index 00000000..5338bc0b --- /dev/null +++ b/q2_types/hmmer/_type.py @@ -0,0 +1,89 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- +from qiime2.plugin import SemanticType +from q2_types.plugin_setup import plugin +from . import ( + AminoHmmMultipleProfilesDirectoryFormat, + DnaHmmMultipleProfilesDirectoryFormat, + RnaHmmMultipleProfilesDirectoryFormat, + AminoHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat +) + + +HMM = SemanticType('HMM', field_names='type') +SingleAminoProfile = SemanticType( + 'SingleAminoProfile', variant_of=HMM.field['type'] +) +SingleDNAProfile = SemanticType( + 'SingleDNAProfile', variant_of=HMM.field['type'] +) +SingleRNAProfile = SemanticType( + 'SingleRNAProfile', variant_of=HMM.field['type'] +) +MultipleAminoProfiles = SemanticType( + 'MultipleAminoProfiles', variant_of=HMM.field['type'] +) +MultipleDNAProfiles = SemanticType( + 'MultipleDNAProfiles', variant_of=HMM.field['type'] +) +MultipleRNAProfiles = SemanticType( + 'MultipleRNAProfiles', variant_of=HMM.field['type'] +) +MultipleAminoProfilesPressed = SemanticType( + 'MultipleAminoProfilesPressed', variant_of=HMM.field['type'] +) +MultipleDNAProfilesPressed = SemanticType( + 'MultipleDNAProfilesPressed', variant_of=HMM.field['type'] +) +MultipleRNAProfilesPressed = SemanticType( + 'MultipleRNAProfilesPressed', variant_of=HMM.field['type'] +) + +plugin.register_semantic_types( + HMM, + SingleAminoProfile, SingleDNAProfile, SingleRNAProfile, + MultipleAminoProfiles, MultipleDNAProfiles, MultipleRNAProfiles, + MultipleAminoProfilesPressed, MultipleDNAProfilesPressed, + MultipleRNAProfilesPressed +) + +plugin.register_artifact_class( + HMM[SingleAminoProfile], + directory_format=AminoHmmDirectoryFormat, + description=("TODO") +) + +plugin.register_artifact_class( + HMM[SingleDNAProfile], + directory_format=DnaHmmDirectoryFormat, + description=("TODO") +) + +plugin.register_artifact_class( + HMM[SingleDNAProfile], + directory_format=RnaHmmDirectoryFormat, + description=("TODO") +) + +plugin.register_artifact_class( + HMM[MultipleAminoProfiles], + directory_format=AminoHmmMultipleProfilesDirectoryFormat, + description=("TODO") +) + +plugin.register_artifact_class( + HMM[MultipleDNAProfiles], + directory_format=DnaHmmMultipleProfilesDirectoryFormat, + description=("TODO") +) + +plugin.register_artifact_class( + HMM[MultipleRNAProfiles], + directory_format=RnaHmmMultipleProfilesDirectoryFormat, + description=("TODO") +) diff --git a/q2_types/reference_db/tests/data/hmmer/bacteria/a.fa b/q2_types/hmmer/tests/data/hmmer/bacteria/a.fa similarity index 100% rename from q2_types/reference_db/tests/data/hmmer/bacteria/a.fa rename to q2_types/hmmer/tests/data/hmmer/bacteria/a.fa diff --git a/q2_types/reference_db/tests/data/hmmer/bacteria/b.fa b/q2_types/hmmer/tests/data/hmmer/bacteria/b.fa similarity index 100% rename from q2_types/reference_db/tests/data/hmmer/bacteria/b.fa rename to q2_types/hmmer/tests/data/hmmer/bacteria/b.fa diff --git a/q2_types/reference_db/tests/data/hmmer/bacteria/b2.fa b/q2_types/hmmer/tests/data/hmmer/bacteria/b2.fa similarity index 100% rename from q2_types/reference_db/tests/data/hmmer/bacteria/b2.fa rename to q2_types/hmmer/tests/data/hmmer/bacteria/b2.fa diff --git a/q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.h3f b/q2_types/hmmer/tests/data/hmmer/bacteria/bacteria.hmm.h3f similarity index 100% rename from q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.h3f rename to q2_types/hmmer/tests/data/hmmer/bacteria/bacteria.hmm.h3f diff --git a/q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.h3i b/q2_types/hmmer/tests/data/hmmer/bacteria/bacteria.hmm.h3i similarity index 100% rename from q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.h3i rename to q2_types/hmmer/tests/data/hmmer/bacteria/bacteria.hmm.h3i diff --git a/q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.h3m b/q2_types/hmmer/tests/data/hmmer/bacteria/bacteria.hmm.h3m similarity index 100% rename from q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.h3m rename to q2_types/hmmer/tests/data/hmmer/bacteria/bacteria.hmm.h3m diff --git a/q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.h3p b/q2_types/hmmer/tests/data/hmmer/bacteria/bacteria.hmm.h3p similarity index 100% rename from q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.h3p rename to q2_types/hmmer/tests/data/hmmer/bacteria/bacteria.hmm.h3p diff --git a/q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.idmap b/q2_types/hmmer/tests/data/hmmer/bacteria/bacteria.hmm.idmap similarity index 100% rename from q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.idmap rename to q2_types/hmmer/tests/data/hmmer/bacteria/bacteria.hmm.idmap diff --git a/q2_types/reference_db/tests/data/hmmer/invalid_idmaps/1.hmm.idmap b/q2_types/hmmer/tests/data/hmmer/invalid_idmaps/1.hmm.idmap similarity index 100% rename from q2_types/reference_db/tests/data/hmmer/invalid_idmaps/1.hmm.idmap rename to q2_types/hmmer/tests/data/hmmer/invalid_idmaps/1.hmm.idmap diff --git a/q2_types/reference_db/tests/data/hmmer/invalid_idmaps/2.hmm.idmap b/q2_types/hmmer/tests/data/hmmer/invalid_idmaps/2.hmm.idmap similarity index 100% rename from q2_types/reference_db/tests/data/hmmer/invalid_idmaps/2.hmm.idmap rename to q2_types/hmmer/tests/data/hmmer/invalid_idmaps/2.hmm.idmap diff --git a/q2_types/reference_db/tests/data/hmmer/invalid_idmaps/3.hmm.idmap b/q2_types/hmmer/tests/data/hmmer/invalid_idmaps/3.hmm.idmap similarity index 100% rename from q2_types/reference_db/tests/data/hmmer/invalid_idmaps/3.hmm.idmap rename to q2_types/hmmer/tests/data/hmmer/invalid_idmaps/3.hmm.idmap diff --git a/q2_types/reference_db/tests/data/hmmer/invalid_idmaps/4.hmm.idmap b/q2_types/hmmer/tests/data/hmmer/invalid_idmaps/4.hmm.idmap similarity index 100% rename from q2_types/reference_db/tests/data/hmmer/invalid_idmaps/4.hmm.idmap rename to q2_types/hmmer/tests/data/hmmer/invalid_idmaps/4.hmm.idmap diff --git a/q2_types/reference_db/__init__.py b/q2_types/reference_db/__init__.py index d8a8b796..5bb6cf5a 100644 --- a/q2_types/reference_db/__init__.py +++ b/q2_types/reference_db/__init__.py @@ -10,7 +10,7 @@ from q2_types.reference_db._type import ( ReferenceDB, Diamond, Eggnog, NCBITaxonomy, - EggnogProteinSequences, HMMER + EggnogProteinSequences ) from q2_types.reference_db._format import ( @@ -21,15 +21,13 @@ DiamondDatabaseDirFmt, NCBITaxonomyDirFmt, EggnogProteinSequencesDirFmt, - HmmerDirFmt ) __all__ = [ 'ReferenceDB', 'Diamond', 'Eggnog', 'DiamondDatabaseFileFmt', 'DiamondDatabaseDirFmt', 'EggnogRefDirFmt', 'EggnogRefTextFileFmt', 'EggnogRefBinFileFmt', 'NCBITaxonomyDirFmt', 'NCBITaxonomy', - 'EggnogProteinSequencesDirFmt', 'EggnogProteinSequences', 'HMMER', - 'HmmerDirFmt' + 'EggnogProteinSequencesDirFmt', 'EggnogProteinSequences' ] importlib.import_module('q2_types.reference_db._format') diff --git a/q2_types/reference_db/_format.py b/q2_types/reference_db/_format.py index 7f80e02f..41a65c0c 100644 --- a/q2_types/reference_db/_format.py +++ b/q2_types/reference_db/_format.py @@ -7,13 +7,11 @@ # ---------------------------------------------------------------------------- import gzip import re -from pyhmmer.plan7 import HMMFile from qiime2.plugin import model from qiime2.core.exceptions import ValidationError from q2_types.plugin_setup import plugin from q2_types.reference_db._type import ( ReferenceDB, Eggnog, Diamond, NCBITaxonomy, EggnogProteinSequences, - aminoHMM, dnaHMM, rnaHMM, aminoHMMpressed, rnaHMMpressed, dnaHMMpressed ) from q2_types.feature_data import MixedCaseProteinFASTAFormat @@ -293,181 +291,3 @@ class EggnogProteinSequencesDirFmt(model.DirectoryFormat): plugin.register_formats(EggnogProteinSequencesDirFmt) plugin.register_semantic_type_to_format(ReferenceDB[EggnogProteinSequences], EggnogProteinSequencesDirFmt) - - -class HmmBinaryFileFmt(model.BinaryFileFormat): - def _validate_(self, level): - pass - - -class HmmIdmapFileFmt(model.TextFileFormat): - def _validate_(self, level): - with open(str(self), 'r') as file: - # Set the number of rows to be parsed - max_lines = {"min": 100, "max": 10000000}[level] - lines = file.readlines() - for i, line in enumerate(lines, 1): - # Check number of lines parsed so far - if i > max_lines: - break - - # Validate line - if not re.match(r'^(\d+) ([A-Z0-9]+)$', line): - raise ValidationError( - f"Invalid line {i}.\n" - f"{line} \n" - "Expected index and an alphanumeric code separated " - "by a single space." - ) - - # Check index is equal to line number - idx, code = line.rstrip("\n").split(sep=" ") - if not idx == str(i): - raise ValidationError( - f"Invalid line {i}.\n" - f"{line} \n" - f"Expected index {i} but got {idx} instead.\n" - ) - - -class BaseHmmPressedDirFmt(model.directory_format): - """ - The .h3m file contains the profile HMMs - and their annotation in a binary format. The .h3i file is an - SSI index for the .h3m file. The .h3f file contains - precomputed data structures for the fast heuristic filter - (the MSV filter). The .h3p file contains precomputed data - structures for the rest of each profile. - """ - h3m = model.File(r'.*\.hmm\.h3m', format=HmmBinaryFileFmt) - h3i = model.File(r'.*\.hmm\.h3i', format=HmmBinaryFileFmt) - h3f = model.File(r'.*\.hmm\.h3f', format=HmmBinaryFileFmt) - h3p = model.File(r'.*\.hmm\.h3p', format=HmmBinaryFileFmt) - idmap = model.File( - r'.*\.hmm\.idmap', format=HmmIdmapFileFmt, optional=True - ) - - -plugin.register_semantic_type_to_format( - ReferenceDB[aminoHMMpressed], BaseHmmPressedDirFmt -) - -plugin.register_semantic_type_to_format( - ReferenceDB[dnaHMMpressed], BaseHmmPressedDirFmt -) - -plugin.register_semantic_type_to_format( - ReferenceDB[rnaHMMpressed], BaseHmmPressedDirFmt -) - - -class HmmBaseFileFmt(model.TextFileFormat): - def _validate_file_fmt( - self, level: str, alphabet: str, single_profile: bool - ): - """ - Check http://eddylab.org/software/hmmer/Userguide.pdf - section "HMMER profile HMM files" for full description of - hmm file format. - """ - parse_n_profiles = {"min": 3, "max": None}[level] - tolerance = 0.0001 - - with HMMFile(str(self)) as hmm_file: - hmm_profiles = list(hmm_file) - - if len(hmm_profiles) > 1 and single_profile: - raise ValidationError( - f"Expected 1 profile, found {len(hmm_profiles)}." - ) - - for hmm_profile in hmm_profiles[:parse_n_profiles]: - hmm_profile.validate(tolerance=tolerance) - - if hmm_profile.alphabet.lower() != alphabet: - raise ValidationError( - "Found profile with alphabet: " - f"{hmm_profile.alph.lower()}\n" - f"{self.__class__} only accepts {alphabet} profiles." - ) - - -class AminoHmmFileFmt(HmmBaseFileFmt): - alphabet = "amino" - - def _validate_(self, level): - self._validate_file_fmt(self, level, self.alphabet, True) - - -class DnaHmmFileFmt(HmmBaseFileFmt): - alphabet = "dna" - - def _validate_(self, level): - self._validate_file_fmt(self, level, self.alphabet, True) - - -class RnaHmmFileFmt(HmmBaseFileFmt): - alphabet = "rna" - - def _validate_(self, level): - self._validate_file_fmt(self, level, self.alphabet, True) - - -AminoHmmDirectoryFormat = model.SingleFileDirectoryFormat( - 'AminoHmmFileFmt', 'profile.hmm', AminoHmmFileFmt) - -DnaHmmDirectoryFormat = model.SingleFileDirectoryFormat( - 'DnaHmmFileFmt', 'profile.hmm', DnaHmmFileFmt) - -RnaHmmDirectoryFormat = model.SingleFileDirectoryFormat( - 'RnaHmmFileFmt', 'profile.hmm', RnaHmmFileFmt) - -plugin.register_formats( - AminoHmmFileFmt, DnaHmmFileFmt, RnaHmmFileFmt, - AminoHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat -) - - -class AminoHmmDBFileFmt(AminoHmmFileFmt): - def _validate_(self, level): - self._validate_file_fmt(self, level, self.alphabet, False) - - -class DnaHmmDBFileFmt(DnaHmmFileFmt): - def _validate_(self, level): - self._validate_file_fmt(self, level, self.alphabet, False) - - -class RnaHmmDBFileFmt(RnaHmmFileFmt): - def _validate_(self, level): - self._validate_file_fmt(self, level, self.alphabet, False) - - -AminoHmmDbDirectoryFormat = model.SingleFileDirectoryFormat( - 'HmmAminoDBFileFmt', 'profile.hmm', AminoHmmDBFileFmt -) - -DnaHmmDbDirectoryFormat = model.SingleFileDirectoryFormat( - 'HmmDnaDBFileFmt', 'profile.hmm', DnaHmmDBFileFmt -) - -RnaHmmDbDirectoryFormat = model.SingleFileDirectoryFormat( - 'HmmRnaDBFileFmt', 'profile.hmm', RnaHmmDBFileFmt -) - -plugin.register_formats( - AminoHmmDbDirectoryFormat, DnaHmmDbDirectoryFormat, AminoHmmDBFileFmt, - RnaHmmDbDirectoryFormat, DnaHmmDBFileFmt, RnaHmmDBFileFmt -) - -plugin.register_semantic_type_to_format( - ReferenceDB[aminoHMM], AminoHmmDBFileFmt -) - -plugin.register_semantic_type_to_format( - ReferenceDB[dnaHMM], DnaHmmDBFileFmt -) - -plugin.register_semantic_type_to_format( - ReferenceDB[rnaHMM], RnaHmmDBFileFmt -) diff --git a/q2_types/reference_db/_type.py b/q2_types/reference_db/_type.py index d694d95e..162ed993 100644 --- a/q2_types/reference_db/_type.py +++ b/q2_types/reference_db/_type.py @@ -5,8 +5,6 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- - - from qiime2.plugin import SemanticType from q2_types.plugin_setup import plugin @@ -19,25 +17,6 @@ EggnogProteinSequences = SemanticType( 'EggnogProteinSequences', variant_of=ReferenceDB.field['type'] ) -aminoHMM = SemanticType( - 'aminoHMM', variant_of=ReferenceDB.field['type'] -) -dnaHMM = SemanticType( - 'dnaHMM', variant_of=ReferenceDB.field['type'] -) -rnaHMM = SemanticType( - 'rnaHMM', variant_of=ReferenceDB.field['type'] -) -aminoHMMpressed = SemanticType( - 'aminoHMMpressed', variant_of=ReferenceDB.field['type'] -) -dnaHMMpressed = SemanticType( - 'dnaHMMpressed', variant_of=ReferenceDB.field['type'] -) -rnaHMMpressed = SemanticType( - 'rnaHMMpressed', variant_of=ReferenceDB.field['type'] -) plugin.register_semantic_types( - ReferenceDB, Diamond, Eggnog, NCBITaxonomy, EggnogProteinSequences, - aminoHMM, dnaHMM, rnaHMM, aminoHMMpressed, rnaHMMpressed, dnaHMMpressed + Diamond, Eggnog, NCBITaxonomy, EggnogProteinSequences, ) From f95358e91946e5e6e917070e38737698f7a12b20 Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Fri, 31 May 2024 17:17:28 +0200 Subject: [PATCH 09/28] remove dif from irrelevant files --- q2_types/reference_db/__init__.py | 14 ++++++-------- q2_types/reference_db/_format.py | 3 ++- q2_types/reference_db/_type.py | 2 +- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/q2_types/reference_db/__init__.py b/q2_types/reference_db/__init__.py index 5bb6cf5a..04b3bf6b 100644 --- a/q2_types/reference_db/__init__.py +++ b/q2_types/reference_db/__init__.py @@ -20,15 +20,13 @@ DiamondDatabaseFileFmt, DiamondDatabaseDirFmt, NCBITaxonomyDirFmt, - EggnogProteinSequencesDirFmt, - ) + EggnogProteinSequencesDirFmt +) -__all__ = [ - 'ReferenceDB', 'Diamond', 'Eggnog', 'DiamondDatabaseFileFmt', - 'DiamondDatabaseDirFmt', 'EggnogRefDirFmt', 'EggnogRefTextFileFmt', - 'EggnogRefBinFileFmt', 'NCBITaxonomyDirFmt', 'NCBITaxonomy', - 'EggnogProteinSequencesDirFmt', 'EggnogProteinSequences' -] +__all__ = ['ReferenceDB', 'Diamond', 'Eggnog', 'DiamondDatabaseFileFmt', + 'DiamondDatabaseDirFmt', 'EggnogRefDirFmt', 'EggnogRefTextFileFmt', + 'EggnogRefBinFileFmt', 'NCBITaxonomyDirFmt', 'NCBITaxonomy', + 'EggnogProteinSequencesDirFmt', 'EggnogProteinSequences'] importlib.import_module('q2_types.reference_db._format') importlib.import_module('q2_types.reference_db._type') diff --git a/q2_types/reference_db/_format.py b/q2_types/reference_db/_format.py index 41a65c0c..3d5a8267 100644 --- a/q2_types/reference_db/_format.py +++ b/q2_types/reference_db/_format.py @@ -11,7 +11,8 @@ from qiime2.core.exceptions import ValidationError from q2_types.plugin_setup import plugin from q2_types.reference_db._type import ( - ReferenceDB, Eggnog, Diamond, NCBITaxonomy, EggnogProteinSequences, + ReferenceDB, Eggnog, Diamond, NCBITaxonomy, + EggnogProteinSequences ) from q2_types.feature_data import MixedCaseProteinFASTAFormat diff --git a/q2_types/reference_db/_type.py b/q2_types/reference_db/_type.py index 162ed993..0d2c88b7 100644 --- a/q2_types/reference_db/_type.py +++ b/q2_types/reference_db/_type.py @@ -18,5 +18,5 @@ 'EggnogProteinSequences', variant_of=ReferenceDB.field['type'] ) plugin.register_semantic_types( - Diamond, Eggnog, NCBITaxonomy, EggnogProteinSequences, + ReferenceDB, Diamond, Eggnog, NCBITaxonomy, EggnogProteinSequences ) From 9c8447f1f9b9cdff7fdcc2ed06c74b3bcda65e52 Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Fri, 31 May 2024 17:19:36 +0200 Subject: [PATCH 10/28] same as las commit --- q2_types/reference_db/__init__.py | 2 +- q2_types/reference_db/_format.py | 2 ++ q2_types/reference_db/_type.py | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/q2_types/reference_db/__init__.py b/q2_types/reference_db/__init__.py index 04b3bf6b..be66ab92 100644 --- a/q2_types/reference_db/__init__.py +++ b/q2_types/reference_db/__init__.py @@ -21,7 +21,7 @@ DiamondDatabaseDirFmt, NCBITaxonomyDirFmt, EggnogProteinSequencesDirFmt -) + ) __all__ = ['ReferenceDB', 'Diamond', 'Eggnog', 'DiamondDatabaseFileFmt', 'DiamondDatabaseDirFmt', 'EggnogRefDirFmt', 'EggnogRefTextFileFmt', diff --git a/q2_types/reference_db/_format.py b/q2_types/reference_db/_format.py index 3d5a8267..ad6697f2 100644 --- a/q2_types/reference_db/_format.py +++ b/q2_types/reference_db/_format.py @@ -5,6 +5,8 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- + + import gzip import re from qiime2.plugin import model diff --git a/q2_types/reference_db/_type.py b/q2_types/reference_db/_type.py index 0d2c88b7..0f773a79 100644 --- a/q2_types/reference_db/_type.py +++ b/q2_types/reference_db/_type.py @@ -5,6 +5,8 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- + + from qiime2.plugin import SemanticType from q2_types.plugin_setup import plugin @@ -17,6 +19,7 @@ EggnogProteinSequences = SemanticType( 'EggnogProteinSequences', variant_of=ReferenceDB.field['type'] ) + plugin.register_semantic_types( ReferenceDB, Diamond, Eggnog, NCBITaxonomy, EggnogProteinSequences ) From 90bdde4dadbb8046f52bb014d1619e8ff3518a06 Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Fri, 31 May 2024 17:36:13 +0200 Subject: [PATCH 11/28] more changes to reduce diff. TODO: adjust tests --- q2_types/hmmer/__init__.py | 6 +- q2_types/hmmer/tests/test_format.py | 89 ++++++++++++++++++++++ q2_types/hmmer/tests/test_type.py | 21 +++++ q2_types/reference_db/tests/test_format.py | 86 ++------------------- q2_types/reference_db/tests/test_type.py | 13 +--- 5 files changed, 122 insertions(+), 93 deletions(-) create mode 100644 q2_types/hmmer/tests/test_format.py create mode 100644 q2_types/hmmer/tests/test_type.py diff --git a/q2_types/hmmer/__init__.py b/q2_types/hmmer/__init__.py index 4ec67566..f48c0d1e 100644 --- a/q2_types/hmmer/__init__.py +++ b/q2_types/hmmer/__init__.py @@ -13,6 +13,9 @@ AminoHmmFileFmt, DnaHmmFileFmt, RnaHmmFileFmt, AminoHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat ) +from ._type import ( + HMM +) __all__ = [ "AminoHmmMultipleProfilesFileFmt", "DnaHmmMultipleProfilesFileFmt", @@ -20,5 +23,6 @@ "DnaHmmMultipleProfilesDirectoryFormat", "RnaHmmMultipleProfilesDirectoryFormat", "AminoHmmFileFmt", "DnaHmmFileFmt", "RnaHmmFileFmt", - "AminoHmmDirectoryFormat", "DnaHmmDirectoryFormat", "RnaHmmDirectoryFormat" + "AminoHmmDirectoryFormat", "DnaHmmDirectoryFormat", + "RnaHmmDirectoryFormat", "HMM" ] diff --git a/q2_types/hmmer/tests/test_format.py b/q2_types/hmmer/tests/test_format.py new file mode 100644 index 00000000..6672f96b --- /dev/null +++ b/q2_types/hmmer/tests/test_format.py @@ -0,0 +1,89 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- +import tempfile +import shutil +import os +from qiime2.plugin.testing import TestPluginBase +from q2_types.hmmer._format import HmmIdmapFileFmt +from qiime2.plugin import ValidationError + + +class TestRefFormats(TestPluginBase): + package = 'q2_types.hmm.tests' + + def test_HmmerDirFmt_valid(self): + fmt = ...(self.get_data_path("hmmer/bacteria"), 'r') + fmt.validate() + + def test_HmmerDirFmt_invalid_idmap_1(self): + fmt = HmmIdmapFileFmt(self.get_data_path( + "hmmer/invalid_idmaps/1.hmm.idmap"), 'r' + ) + with self.assertRaisesRegex( + ValidationError, + "Expected index and an alphanumeric code separated " + "by a single space." + ): + fmt.validate(level="min") + + def test_HmmerDirFmt_invalid_idmap_2(self): + fmt = HmmIdmapFileFmt(self.get_data_path( + "hmmer/invalid_idmaps/2.hmm.idmap"), 'r' + ) + with self.assertRaisesRegex( + ValidationError, + "Expected index and an alphanumeric code separated " + "by a single space." + ): + fmt.validate(level="min") + + def test_HmmerDirFmt_invalid_idmap_3(self): + fmt = HmmIdmapFileFmt(self.get_data_path( + "hmmer/invalid_idmaps/3.hmm.idmap"), 'r' + ) + with self.assertRaisesRegex( + ValidationError, + 'Expected index' + ): + fmt.validate(level="min") + + def test_HmmerDirFmt_invalid_idmap_4(self): + fmt = HmmIdmapFileFmt(self.get_data_path( + "hmmer/invalid_idmaps/4.hmm.idmap"), 'r' + ) + with self.assertRaisesRegex( + ValidationError, + "Expected index and an alphanumeric code separated " + "by a single space." + ): + fmt.validate(level="min") + + def test_HmmerDirFmt_missing_hmm(self): + with tempfile.TemporaryDirectory() as tmp: + shutil.copytree( + self.get_data_path("hmmer/bacteria"), tmp, dirs_exist_ok=True + ) + os.remove(f"{tmp}/bacteria.hmm.h3f") + fmt = ...(tmp, 'r') + with self.assertRaisesRegex( + ValidationError, "Missing one or more files" + ): + fmt.validate(level="min") + + def test_HmmerDirFmt_missing_fa(self): + with tempfile.TemporaryDirectory() as tmp: + shutil.copytree( + self.get_data_path("hmmer/bacteria"), tmp, dirs_exist_ok=True + ) + for file in ["a", "b", "b2"]: + os.remove(f"{tmp}/{file}.fa") + fmt = ...(tmp, 'r') + with self.assertRaisesRegex( + ValidationError, "Missing one or more files" + ): + fmt.validate(level="min") diff --git a/q2_types/hmmer/tests/test_type.py b/q2_types/hmmer/tests/test_type.py new file mode 100644 index 00000000..f5c4c67d --- /dev/null +++ b/q2_types/hmmer/tests/test_type.py @@ -0,0 +1,21 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- +from qiime2.plugin.testing import TestPluginBase +from . import HMM + + +class TestHMMType(TestPluginBase): + package = 'q2_types.reference_db.tests' + + def test_hmmer_registration(self): + self.assertRegisteredSemanticType(...) + + def test_HMMER_semantic_type_registered_to_DirFmt(self): + self.assertSemanticTypeRegisteredToFormat( + HMM[...], ... + ) diff --git a/q2_types/reference_db/tests/test_format.py b/q2_types/reference_db/tests/test_format.py index a9ef27d9..105b27da 100644 --- a/q2_types/reference_db/tests/test_format.py +++ b/q2_types/reference_db/tests/test_format.py @@ -5,17 +5,13 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- -import tempfile -import shutil -import os from qiime2.plugin.testing import TestPluginBase from q2_types.reference_db._format import ( - DiamondDatabaseFileFmt, DiamondDatabaseDirFmt, EggnogRefBinFileFmt, - EggnogRefDirFmt, NCBITaxonomyNamesFormat, NCBITaxonomyNodesFormat, - NCBITaxonomyDirFmt, NCBITaxonomyBinaryFileFmt, - EggnogProteinSequencesDirFmt, EggnogRefTextFileFmt, HmmerDirFmt, - HmmerIdmapFileFmt -) + DiamondDatabaseFileFmt, DiamondDatabaseDirFmt, EggnogRefBinFileFmt, + EggnogRefDirFmt, NCBITaxonomyNamesFormat, NCBITaxonomyNodesFormat, + NCBITaxonomyDirFmt, NCBITaxonomyBinaryFileFmt, + EggnogProteinSequencesDirFmt, EggnogRefTextFileFmt + ) from qiime2.plugin import ValidationError @@ -155,78 +151,6 @@ def test_EggnogRefTextFileFmt_invalid_taxid_lineage(self): ): fmt_obj.validate() - def test_HmmerDirFmt_valid(self): - fmt = HmmerDirFmt(self.get_data_path("hmmer/bacteria"), 'r') - fmt.validate() - - def test_HmmerDirFmt_invalid_idmap_1(self): - fmt = HmmerIdmapFileFmt(self.get_data_path( - "hmmer/invalid_idmaps/1.hmm.idmap"), 'r' - ) - with self.assertRaisesRegex( - ValidationError, - "Expected index and an alphanumeric code separated " - "by a single space." - ): - fmt.validate(level="min") - - def test_HmmerDirFmt_invalid_idmap_2(self): - fmt = HmmerIdmapFileFmt(self.get_data_path( - "hmmer/invalid_idmaps/2.hmm.idmap"), 'r' - ) - with self.assertRaisesRegex( - ValidationError, - "Expected index and an alphanumeric code separated " - "by a single space." - ): - fmt.validate(level="min") - - def test_HmmerDirFmt_invalid_idmap_3(self): - fmt = HmmerIdmapFileFmt(self.get_data_path( - "hmmer/invalid_idmaps/3.hmm.idmap"), 'r' - ) - with self.assertRaisesRegex( - ValidationError, - 'Expected index' - ): - fmt.validate(level="min") - - def test_HmmerDirFmt_invalid_idmap_4(self): - fmt = HmmerIdmapFileFmt(self.get_data_path( - "hmmer/invalid_idmaps/4.hmm.idmap"), 'r' - ) - with self.assertRaisesRegex( - ValidationError, - "Expected index and an alphanumeric code separated " - "by a single space." - ): - fmt.validate(level="min") - - def test_HmmerDirFmt_missing_hmm(self): - with tempfile.TemporaryDirectory() as tmp: - shutil.copytree( - self.get_data_path("hmmer/bacteria"), tmp, dirs_exist_ok=True - ) - os.remove(f"{tmp}/bacteria.hmm.h3f") - fmt = HmmerDirFmt(tmp, 'r') - with self.assertRaisesRegex( - ValidationError, "Missing one or more files" - ): - fmt.validate(level="min") - - def test_HmmerDirFmt_missing_fa(self): - with tempfile.TemporaryDirectory() as tmp: - shutil.copytree( - self.get_data_path("hmmer/bacteria"), tmp, dirs_exist_ok=True - ) - for file in ["a", "b", "b2"]: - os.remove(f"{tmp}/{file}.fa") - fmt = HmmerDirFmt(tmp, 'r') - with self.assertRaisesRegex( - ValidationError, "Missing one or more files" - ): - fmt.validate(level="min") - class TestNCBIFormats(TestPluginBase): package = "q2_types.reference_db.tests" diff --git a/q2_types/reference_db/tests/test_type.py b/q2_types/reference_db/tests/test_type.py index eb25f0f1..01d3a44e 100644 --- a/q2_types/reference_db/tests/test_type.py +++ b/q2_types/reference_db/tests/test_type.py @@ -10,11 +10,10 @@ from q2_types.reference_db._format import ( DiamondDatabaseDirFmt, EggnogRefDirFmt, NCBITaxonomyDirFmt, - EggnogProteinSequencesDirFmt, HmmerDirFmt + EggnogProteinSequencesDirFmt ) from q2_types.reference_db._type import ( - ReferenceDB, Diamond, Eggnog, NCBITaxonomy, EggnogProteinSequences, - HMMER + ReferenceDB, Diamond, Eggnog, NCBITaxonomy, EggnogProteinSequences ) @@ -55,11 +54,3 @@ def test_EggnogProteinSequences_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( ReferenceDB[EggnogProteinSequences], EggnogProteinSequencesDirFmt) - - def test_hmmer_registration(self): - self.assertRegisteredSemanticType(HMMER) - - def test_HMMER_semantic_type_registered_to_DirFmt(self): - self.assertSemanticTypeRegisteredToFormat( - ReferenceDB[HMMER], HmmerDirFmt - ) From cd7ecff93fe6fb0c97bc6edaaf7e46e1c33a70aa Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Fri, 31 May 2024 17:54:24 +0200 Subject: [PATCH 12/28] fix registrations. work in progress --- q2_types/__init__.py | 1 + q2_types/hmmer/__init__.py | 6 ++++-- q2_types/hmmer/_format.py | 19 +------------------ q2_types/hmmer/_type.py | 20 ++++++++++++++++++-- 4 files changed, 24 insertions(+), 22 deletions(-) diff --git a/q2_types/__init__.py b/q2_types/__init__.py index 8b8c2e2c..d78d5d56 100644 --- a/q2_types/__init__.py +++ b/q2_types/__init__.py @@ -31,3 +31,4 @@ importlib.import_module('q2_types.genome_data') importlib.import_module('q2_types.kaiju') importlib.import_module('q2_types.reference_db') +importlib.import_module('q2_types.hmmer') diff --git a/q2_types/hmmer/__init__.py b/q2_types/hmmer/__init__.py index f48c0d1e..df5bee42 100644 --- a/q2_types/hmmer/__init__.py +++ b/q2_types/hmmer/__init__.py @@ -6,8 +6,10 @@ # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- from ._format import ( - AminoHmmMultipleProfilesFileFmt, DnaHmmMultipleProfilesFileFmt, - RnaHmmMultipleProfilesFileFmt, AminoHmmMultipleProfilesDirectoryFormat, + AminoHmmMultipleProfilesFileFmt, + DnaHmmMultipleProfilesFileFmt, + RnaHmmMultipleProfilesFileFmt, + AminoHmmMultipleProfilesDirectoryFormat, DnaHmmMultipleProfilesDirectoryFormat, RnaHmmMultipleProfilesDirectoryFormat, AminoHmmFileFmt, DnaHmmFileFmt, RnaHmmFileFmt, diff --git a/q2_types/hmmer/_format.py b/q2_types/hmmer/_format.py index 51de6459..818077cc 100644 --- a/q2_types/hmmer/_format.py +++ b/q2_types/hmmer/_format.py @@ -10,10 +10,6 @@ from qiime2.plugin import model from qiime2.core.exceptions import ValidationError from q2_types.plugin_setup import plugin -from q2_types.hmmer._type import ( - HMM, MultipleAminoProfilesPressed, MultipleDNAProfilesPressed, - MultipleRNAProfilesPressed -) class HmmBinaryFileFmt(model.BinaryFileFormat): @@ -51,7 +47,7 @@ def _validate_(self, level): ) -class BaseHmmPressedDirFmt(model.directory_format): +class BaseHmmPressedDirFmt(model.DirectoryFormat): """ The .h3m file contains the profile HMMs and their annotation in a binary format. The .h3i file is an @@ -69,19 +65,6 @@ class BaseHmmPressedDirFmt(model.directory_format): ) -plugin.register_semantic_type_to_format( - HMM[MultipleAminoProfilesPressed], BaseHmmPressedDirFmt -) - -plugin.register_semantic_type_to_format( - HMM[MultipleDNAProfilesPressed], BaseHmmPressedDirFmt -) - -plugin.register_semantic_type_to_format( - HMM[MultipleRNAProfilesPressed], BaseHmmPressedDirFmt -) - - class HmmBaseFileFmt(model.TextFileFormat): def _validate_file_fmt( self, level: str, alphabet: str, single_profile: bool diff --git a/q2_types/hmmer/_type.py b/q2_types/hmmer/_type.py index 5338bc0b..21e18a96 100644 --- a/q2_types/hmmer/_type.py +++ b/q2_types/hmmer/_type.py @@ -7,15 +7,31 @@ # ---------------------------------------------------------------------------- from qiime2.plugin import SemanticType from q2_types.plugin_setup import plugin -from . import ( +from q2_types.hmmer._format import ( AminoHmmMultipleProfilesDirectoryFormat, DnaHmmMultipleProfilesDirectoryFormat, RnaHmmMultipleProfilesDirectoryFormat, - AminoHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat + AminoHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat, + MultipleAminoProfilesPressed, + MultipleDNAProfilesPressed, + MultipleRNAProfilesPressed, + BaseHmmPressedDirFmt ) HMM = SemanticType('HMM', field_names='type') +plugin.register_semantic_type_to_format( + HMM[MultipleAminoProfilesPressed], BaseHmmPressedDirFmt +) + +plugin.register_semantic_type_to_format( + HMM[MultipleDNAProfilesPressed], BaseHmmPressedDirFmt +) + +plugin.register_semantic_type_to_format( + HMM[MultipleRNAProfilesPressed], BaseHmmPressedDirFmt +) + SingleAminoProfile = SemanticType( 'SingleAminoProfile', variant_of=HMM.field['type'] ) From 5453694dd21c7548befe25c9490cdd6e257932e1 Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Mon, 3 Jun 2024 11:36:28 +0200 Subject: [PATCH 13/28] add descriptions and fix circular import --- q2_types/hmmer/_format.py | 1 - q2_types/hmmer/_type.py | 73 +++++++++++++++++++++++++++------------ 2 files changed, 51 insertions(+), 23 deletions(-) diff --git a/q2_types/hmmer/_format.py b/q2_types/hmmer/_format.py index 818077cc..8529ad0c 100644 --- a/q2_types/hmmer/_format.py +++ b/q2_types/hmmer/_format.py @@ -165,6 +165,5 @@ def _validate_(self, level): RnaHmmMultipleProfilesFileFmt, AminoHmmMultipleProfilesDirectoryFormat, DnaHmmMultipleProfilesDirectoryFormat, RnaHmmMultipleProfilesDirectoryFormat, - AminoHmmFileFmt, DnaHmmFileFmt, RnaHmmFileFmt, AminoHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat ) diff --git a/q2_types/hmmer/_type.py b/q2_types/hmmer/_type.py index 21e18a96..6439bb84 100644 --- a/q2_types/hmmer/_type.py +++ b/q2_types/hmmer/_type.py @@ -12,26 +12,11 @@ DnaHmmMultipleProfilesDirectoryFormat, RnaHmmMultipleProfilesDirectoryFormat, AminoHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat, - MultipleAminoProfilesPressed, - MultipleDNAProfilesPressed, - MultipleRNAProfilesPressed, BaseHmmPressedDirFmt ) HMM = SemanticType('HMM', field_names='type') -plugin.register_semantic_type_to_format( - HMM[MultipleAminoProfilesPressed], BaseHmmPressedDirFmt -) - -plugin.register_semantic_type_to_format( - HMM[MultipleDNAProfilesPressed], BaseHmmPressedDirFmt -) - -plugin.register_semantic_type_to_format( - HMM[MultipleRNAProfilesPressed], BaseHmmPressedDirFmt -) - SingleAminoProfile = SemanticType( 'SingleAminoProfile', variant_of=HMM.field['type'] ) @@ -59,6 +44,32 @@ MultipleRNAProfilesPressed = SemanticType( 'MultipleRNAProfilesPressed', variant_of=HMM.field['type'] ) +plugin.register_artifact_class( + HMM[MultipleAminoProfilesPressed], + directory_format=BaseHmmPressedDirFmt, + description=( + "A collection of Hidden Markov Model profiles for amino acid " + "sequences in binary format and indexed." + ) +) + +plugin.register_artifact_class( + HMM[MultipleDNAProfilesPressed], + directory_format=BaseHmmPressedDirFmt, + description=( + "A collection of Hidden Markov Model profiles for DNA " + "sequences in binary format and indexed." + ) +) + +plugin.register_artifact_class( + HMM[MultipleRNAProfilesPressed], + directory_format=BaseHmmPressedDirFmt, + description=( + "A collection of Hidden Markov Model profiles for RNA " + "sequences in binary format and indexed." + ) +) plugin.register_semantic_types( HMM, @@ -71,35 +82,53 @@ plugin.register_artifact_class( HMM[SingleAminoProfile], directory_format=AminoHmmDirectoryFormat, - description=("TODO") + description=( + "One single Hidden Markov Model profile, representing a group " + "of related proteins." + ) ) plugin.register_artifact_class( HMM[SingleDNAProfile], directory_format=DnaHmmDirectoryFormat, - description=("TODO") + description=( + "One single Hidden Markov Model profile, representing a group " + "of related DNA sequences." + ) ) plugin.register_artifact_class( - HMM[SingleDNAProfile], + HMM[SingleRNAProfile], directory_format=RnaHmmDirectoryFormat, - description=("TODO") + description=( + "One single Hidden Markov Model profile, representing a group " + "of related RNA sequences." + ) ) plugin.register_artifact_class( HMM[MultipleAminoProfiles], directory_format=AminoHmmMultipleProfilesDirectoryFormat, - description=("TODO") + description=( + "A collection of Hidden Markov Model profiles, each representing a " + "group of related proteins." + ) ) plugin.register_artifact_class( HMM[MultipleDNAProfiles], directory_format=DnaHmmMultipleProfilesDirectoryFormat, - description=("TODO") + description=( + "A collection of Hidden Markov Model profiles, each representing a " + "group of related DNA sequences." + ) ) plugin.register_artifact_class( HMM[MultipleRNAProfiles], directory_format=RnaHmmMultipleProfilesDirectoryFormat, - description=("TODO") + description=( + "A collection of Hidden Markov Model profiles, each representing a " + "group of related RNA sequences." + ) ) From 9b1a8c1379728a123e40990ea6e26c27662aca1f Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Mon, 3 Jun 2024 11:57:40 +0200 Subject: [PATCH 14/28] remove word profile --- q2_types/hmmer/_type.py | 62 ++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/q2_types/hmmer/_type.py b/q2_types/hmmer/_type.py index 6439bb84..163b9be3 100644 --- a/q2_types/hmmer/_type.py +++ b/q2_types/hmmer/_type.py @@ -17,35 +17,35 @@ HMM = SemanticType('HMM', field_names='type') -SingleAminoProfile = SemanticType( - 'SingleAminoProfile', variant_of=HMM.field['type'] +SingleAmino = SemanticType( + 'SingleAmino', variant_of=HMM.field['type'] ) -SingleDNAProfile = SemanticType( - 'SingleDNAProfile', variant_of=HMM.field['type'] +SingleDNA = SemanticType( + 'SingleDNA', variant_of=HMM.field['type'] ) -SingleRNAProfile = SemanticType( - 'SingleRNAProfile', variant_of=HMM.field['type'] +SingleRNA = SemanticType( + 'SingleRNA', variant_of=HMM.field['type'] ) -MultipleAminoProfiles = SemanticType( - 'MultipleAminoProfiles', variant_of=HMM.field['type'] +MultipleAmino = SemanticType( + 'MultipleAmino', variant_of=HMM.field['type'] ) -MultipleDNAProfiles = SemanticType( - 'MultipleDNAProfiles', variant_of=HMM.field['type'] +MultipleDNA = SemanticType( + 'MultipleDNA', variant_of=HMM.field['type'] ) -MultipleRNAProfiles = SemanticType( - 'MultipleRNAProfiles', variant_of=HMM.field['type'] +MultipleRNA = SemanticType( + 'MultipleRNA', variant_of=HMM.field['type'] ) -MultipleAminoProfilesPressed = SemanticType( - 'MultipleAminoProfilesPressed', variant_of=HMM.field['type'] +MultipleAminoPressed = SemanticType( + 'MultipleAminoPressed', variant_of=HMM.field['type'] ) -MultipleDNAProfilesPressed = SemanticType( - 'MultipleDNAProfilesPressed', variant_of=HMM.field['type'] +MultipleDNAPressed = SemanticType( + 'MultipleDNAPressed', variant_of=HMM.field['type'] ) -MultipleRNAProfilesPressed = SemanticType( - 'MultipleRNAProfilesPressed', variant_of=HMM.field['type'] +MultipleRNAPressed = SemanticType( + 'MultipleRNAPressed', variant_of=HMM.field['type'] ) plugin.register_artifact_class( - HMM[MultipleAminoProfilesPressed], + HMM[MultipleAminoPressed], directory_format=BaseHmmPressedDirFmt, description=( "A collection of Hidden Markov Model profiles for amino acid " @@ -54,7 +54,7 @@ ) plugin.register_artifact_class( - HMM[MultipleDNAProfilesPressed], + HMM[MultipleDNAPressed], directory_format=BaseHmmPressedDirFmt, description=( "A collection of Hidden Markov Model profiles for DNA " @@ -63,7 +63,7 @@ ) plugin.register_artifact_class( - HMM[MultipleRNAProfilesPressed], + HMM[MultipleRNAPressed], directory_format=BaseHmmPressedDirFmt, description=( "A collection of Hidden Markov Model profiles for RNA " @@ -73,14 +73,14 @@ plugin.register_semantic_types( HMM, - SingleAminoProfile, SingleDNAProfile, SingleRNAProfile, - MultipleAminoProfiles, MultipleDNAProfiles, MultipleRNAProfiles, - MultipleAminoProfilesPressed, MultipleDNAProfilesPressed, - MultipleRNAProfilesPressed + SingleAmino, SingleDNA, SingleRNA, + MultipleAmino, MultipleDNA, MultipleRNA, + MultipleAminoPressed, MultipleDNAPressed, + MultipleRNAPressed ) plugin.register_artifact_class( - HMM[SingleAminoProfile], + HMM[SingleAmino], directory_format=AminoHmmDirectoryFormat, description=( "One single Hidden Markov Model profile, representing a group " @@ -89,7 +89,7 @@ ) plugin.register_artifact_class( - HMM[SingleDNAProfile], + HMM[SingleDNA], directory_format=DnaHmmDirectoryFormat, description=( "One single Hidden Markov Model profile, representing a group " @@ -98,7 +98,7 @@ ) plugin.register_artifact_class( - HMM[SingleRNAProfile], + HMM[SingleRNA], directory_format=RnaHmmDirectoryFormat, description=( "One single Hidden Markov Model profile, representing a group " @@ -107,7 +107,7 @@ ) plugin.register_artifact_class( - HMM[MultipleAminoProfiles], + HMM[MultipleAmino], directory_format=AminoHmmMultipleProfilesDirectoryFormat, description=( "A collection of Hidden Markov Model profiles, each representing a " @@ -116,7 +116,7 @@ ) plugin.register_artifact_class( - HMM[MultipleDNAProfiles], + HMM[MultipleDNA], directory_format=DnaHmmMultipleProfilesDirectoryFormat, description=( "A collection of Hidden Markov Model profiles, each representing a " @@ -125,7 +125,7 @@ ) plugin.register_artifact_class( - HMM[MultipleRNAProfiles], + HMM[MultipleRNA], directory_format=RnaHmmMultipleProfilesDirectoryFormat, description=( "A collection of Hidden Markov Model profiles, each representing a " From f19fb3b98384aa93d4ad7cd95481370bede0d7cc Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Mon, 3 Jun 2024 15:23:36 +0200 Subject: [PATCH 15/28] add tests for type --- q2_types/hmmer/__init__.py | 14 ++++++-- q2_types/hmmer/_type.py | 17 ++++----- q2_types/hmmer/tests/test_type.py | 57 ++++++++++++++++++++++++++++--- 3 files changed, 73 insertions(+), 15 deletions(-) diff --git a/q2_types/hmmer/__init__.py b/q2_types/hmmer/__init__.py index df5bee42..5e35863a 100644 --- a/q2_types/hmmer/__init__.py +++ b/q2_types/hmmer/__init__.py @@ -13,10 +13,14 @@ DnaHmmMultipleProfilesDirectoryFormat, RnaHmmMultipleProfilesDirectoryFormat, AminoHmmFileFmt, DnaHmmFileFmt, RnaHmmFileFmt, - AminoHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat + AminoHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat, + BaseHmmPressedDirFmt ) from ._type import ( - HMM + HMM, + SingleAmino, SingleDNA, SingleRNA, + MultipleAmino, MultipleDNA, MultipleRNA, + MultipleAminoPressed, MultipleDNAPressed, MultipleRNAPressed ) __all__ = [ @@ -26,5 +30,9 @@ "RnaHmmMultipleProfilesDirectoryFormat", "AminoHmmFileFmt", "DnaHmmFileFmt", "RnaHmmFileFmt", "AminoHmmDirectoryFormat", "DnaHmmDirectoryFormat", - "RnaHmmDirectoryFormat", "HMM" + "RnaHmmDirectoryFormat", "HMM", + "SingleAmino", "SingleDNA", "SingleRNA", + "MultipleAmino", "MultipleDNA", "MultipleRNA", + "MultipleAminoPressed", "MultipleDNAPressed", "MultipleRNAPressed", + "BaseHmmPressedDirFmt" ] diff --git a/q2_types/hmmer/_type.py b/q2_types/hmmer/_type.py index 163b9be3..1714dacd 100644 --- a/q2_types/hmmer/_type.py +++ b/q2_types/hmmer/_type.py @@ -44,6 +44,15 @@ MultipleRNAPressed = SemanticType( 'MultipleRNAPressed', variant_of=HMM.field['type'] ) + +plugin.register_semantic_types( + HMM, + SingleAmino, SingleDNA, SingleRNA, + MultipleAmino, MultipleDNA, MultipleRNA, + MultipleAminoPressed, MultipleDNAPressed, + MultipleRNAPressed +) + plugin.register_artifact_class( HMM[MultipleAminoPressed], directory_format=BaseHmmPressedDirFmt, @@ -71,14 +80,6 @@ ) ) -plugin.register_semantic_types( - HMM, - SingleAmino, SingleDNA, SingleRNA, - MultipleAmino, MultipleDNA, MultipleRNA, - MultipleAminoPressed, MultipleDNAPressed, - MultipleRNAPressed -) - plugin.register_artifact_class( HMM[SingleAmino], directory_format=AminoHmmDirectoryFormat, diff --git a/q2_types/hmmer/tests/test_type.py b/q2_types/hmmer/tests/test_type.py index f5c4c67d..734aa00c 100644 --- a/q2_types/hmmer/tests/test_type.py +++ b/q2_types/hmmer/tests/test_type.py @@ -6,16 +6,65 @@ # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- from qiime2.plugin.testing import TestPluginBase -from . import HMM +from q2_types.hmmer import ( + HMM, BaseHmmPressedDirFmt, + AminoHmmMultipleProfilesDirectoryFormat, + DnaHmmMultipleProfilesDirectoryFormat, + RnaHmmMultipleProfilesDirectoryFormat, + AminoHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat, + SingleAmino, SingleDNA, SingleRNA, + MultipleAmino, MultipleDNA, MultipleRNA, + MultipleAminoPressed, MultipleDNAPressed, MultipleRNAPressed +) class TestHMMType(TestPluginBase): package = 'q2_types.reference_db.tests' def test_hmmer_registration(self): - self.assertRegisteredSemanticType(...) + self.assertRegisteredSemanticType(HMM) - def test_HMMER_semantic_type_registered_to_DirFmt(self): + def test_SingleAmino_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( - HMM[...], ... + HMM[SingleAmino], AminoHmmDirectoryFormat + ) + + def test_SingleDNA_semantic_type_registered_to_DirFmt(self): + self.assertSemanticTypeRegisteredToFormat( + HMM[SingleDNA], DnaHmmDirectoryFormat + ) + + def test_SingleRNA_semantic_type_registered_to_DirFmt(self): + self.assertSemanticTypeRegisteredToFormat( + HMM[SingleRNA], RnaHmmDirectoryFormat + ) + + def test_MultipleAmino_semantic_type_registered_to_DirFmt(self): + self.assertSemanticTypeRegisteredToFormat( + HMM[MultipleAmino], AminoHmmMultipleProfilesDirectoryFormat + ) + + def test_MultipleDNA_semantic_type_registered_to_DirFmt(self): + self.assertSemanticTypeRegisteredToFormat( + HMM[MultipleDNA], DnaHmmMultipleProfilesDirectoryFormat + ) + + def test_MultipleRNA_semantic_type_registered_to_DirFmt(self): + self.assertSemanticTypeRegisteredToFormat( + HMM[MultipleRNA], RnaHmmMultipleProfilesDirectoryFormat + ) + + def test_MultipleAminoPressed_semantic_type_registered_to_DirFmt(self): + self.assertSemanticTypeRegisteredToFormat( + HMM[MultipleAminoPressed], BaseHmmPressedDirFmt + ) + + def test_MultipleDNAPressed_semantic_type_registered_to_DirFmt(self): + self.assertSemanticTypeRegisteredToFormat( + HMM[MultipleDNAPressed], BaseHmmPressedDirFmt + ) + + def test_MultipleRNAPressed_semantic_type_registered_to_DirFmt(self): + self.assertSemanticTypeRegisteredToFormat( + HMM[MultipleRNAPressed], BaseHmmPressedDirFmt ) From 890f0d7d77ae78decaf673c4933a1621f0119a63 Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Mon, 3 Jun 2024 17:11:07 +0200 Subject: [PATCH 16/28] test the fomrats. tbc --- q2_types/hmmer/_format.py | 14 +- q2_types/hmmer/tests/__init__.py | 7 + .../{hmmer => }/bacteria/bacteria.hmm.h3f | 0 .../{hmmer => }/bacteria/bacteria.hmm.h3i | 0 .../{hmmer => }/bacteria/bacteria.hmm.h3m | 0 .../{hmmer => }/bacteria/bacteria.hmm.h3p | 0 .../{hmmer => }/bacteria/bacteria.hmm.idmap | 0 q2_types/hmmer/tests/data/hmmer/bacteria/a.fa | 6 - q2_types/hmmer/tests/data/hmmer/bacteria/b.fa | 12 - .../hmmer/tests/data/hmmer/bacteria/b2.fa | 10 - q2_types/hmmer/tests/data/hmms/amino.hmm | 37 +++ q2_types/hmmer/tests/data/hmms/dna.hmm | 265 ++++++++++++++++++ .../{hmmer => }/invalid_idmaps/1.hmm.idmap | 0 .../{hmmer => }/invalid_idmaps/2.hmm.idmap | 0 .../{hmmer => }/invalid_idmaps/3.hmm.idmap | 0 .../{hmmer => }/invalid_idmaps/4.hmm.idmap | 0 q2_types/hmmer/tests/test_format.py | 75 +++-- 17 files changed, 362 insertions(+), 64 deletions(-) create mode 100644 q2_types/hmmer/tests/__init__.py rename q2_types/hmmer/tests/data/{hmmer => }/bacteria/bacteria.hmm.h3f (100%) rename q2_types/hmmer/tests/data/{hmmer => }/bacteria/bacteria.hmm.h3i (100%) rename q2_types/hmmer/tests/data/{hmmer => }/bacteria/bacteria.hmm.h3m (100%) rename q2_types/hmmer/tests/data/{hmmer => }/bacteria/bacteria.hmm.h3p (100%) rename q2_types/hmmer/tests/data/{hmmer => }/bacteria/bacteria.hmm.idmap (100%) delete mode 100644 q2_types/hmmer/tests/data/hmmer/bacteria/a.fa delete mode 100644 q2_types/hmmer/tests/data/hmmer/bacteria/b.fa delete mode 100644 q2_types/hmmer/tests/data/hmmer/bacteria/b2.fa create mode 100644 q2_types/hmmer/tests/data/hmms/amino.hmm create mode 100644 q2_types/hmmer/tests/data/hmms/dna.hmm rename q2_types/hmmer/tests/data/{hmmer => }/invalid_idmaps/1.hmm.idmap (100%) rename q2_types/hmmer/tests/data/{hmmer => }/invalid_idmaps/2.hmm.idmap (100%) rename q2_types/hmmer/tests/data/{hmmer => }/invalid_idmaps/3.hmm.idmap (100%) rename q2_types/hmmer/tests/data/{hmmer => }/invalid_idmaps/4.hmm.idmap (100%) diff --git a/q2_types/hmmer/_format.py b/q2_types/hmmer/_format.py index 8529ad0c..7eb26c55 100644 --- a/q2_types/hmmer/_format.py +++ b/q2_types/hmmer/_format.py @@ -88,7 +88,7 @@ def _validate_file_fmt( for hmm_profile in hmm_profiles[:parse_n_profiles]: hmm_profile.validate(tolerance=tolerance) - if hmm_profile.alphabet.lower() != alphabet: + if hmm_profile.alphabet.type.lower() != alphabet: raise ValidationError( "Found profile with alphabet: " f"{hmm_profile.alph.lower()}\n" @@ -100,21 +100,21 @@ class AminoHmmFileFmt(HmmBaseFileFmt): alphabet = "amino" def _validate_(self, level): - self._validate_file_fmt(self, level, self.alphabet, True) + self._validate_file_fmt(level, self.alphabet, True) class DnaHmmFileFmt(HmmBaseFileFmt): alphabet = "dna" def _validate_(self, level): - self._validate_file_fmt(self, level, self.alphabet, True) + self._validate_file_fmt(level, self.alphabet, True) class RnaHmmFileFmt(HmmBaseFileFmt): alphabet = "rna" def _validate_(self, level): - self._validate_file_fmt(self, level, self.alphabet, True) + self._validate_file_fmt(level, self.alphabet, True) AminoHmmDirectoryFormat = model.SingleFileDirectoryFormat( @@ -129,17 +129,17 @@ def _validate_(self, level): class AminoHmmMultipleProfilesFileFmt(AminoHmmFileFmt): def _validate_(self, level): - self._validate_file_fmt(self, level, self.alphabet, False) + self._validate_file_fmt(level, self.alphabet, False) class DnaHmmMultipleProfilesFileFmt(DnaHmmFileFmt): def _validate_(self, level): - self._validate_file_fmt(self, level, self.alphabet, False) + self._validate_file_fmt(level, self.alphabet, False) class RnaHmmMultipleProfilesFileFmt(RnaHmmFileFmt): def _validate_(self, level): - self._validate_file_fmt(self, level, self.alphabet, False) + self._validate_file_fmt(level, self.alphabet, False) AminoHmmMultipleProfilesDirectoryFormat = model.SingleFileDirectoryFormat( diff --git a/q2_types/hmmer/tests/__init__.py b/q2_types/hmmer/tests/__init__.py new file mode 100644 index 00000000..afcc05c2 --- /dev/null +++ b/q2_types/hmmer/tests/__init__.py @@ -0,0 +1,7 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- diff --git a/q2_types/hmmer/tests/data/hmmer/bacteria/bacteria.hmm.h3f b/q2_types/hmmer/tests/data/bacteria/bacteria.hmm.h3f similarity index 100% rename from q2_types/hmmer/tests/data/hmmer/bacteria/bacteria.hmm.h3f rename to q2_types/hmmer/tests/data/bacteria/bacteria.hmm.h3f diff --git a/q2_types/hmmer/tests/data/hmmer/bacteria/bacteria.hmm.h3i b/q2_types/hmmer/tests/data/bacteria/bacteria.hmm.h3i similarity index 100% rename from q2_types/hmmer/tests/data/hmmer/bacteria/bacteria.hmm.h3i rename to q2_types/hmmer/tests/data/bacteria/bacteria.hmm.h3i diff --git a/q2_types/hmmer/tests/data/hmmer/bacteria/bacteria.hmm.h3m b/q2_types/hmmer/tests/data/bacteria/bacteria.hmm.h3m similarity index 100% rename from q2_types/hmmer/tests/data/hmmer/bacteria/bacteria.hmm.h3m rename to q2_types/hmmer/tests/data/bacteria/bacteria.hmm.h3m diff --git a/q2_types/hmmer/tests/data/hmmer/bacteria/bacteria.hmm.h3p b/q2_types/hmmer/tests/data/bacteria/bacteria.hmm.h3p similarity index 100% rename from q2_types/hmmer/tests/data/hmmer/bacteria/bacteria.hmm.h3p rename to q2_types/hmmer/tests/data/bacteria/bacteria.hmm.h3p diff --git a/q2_types/hmmer/tests/data/hmmer/bacteria/bacteria.hmm.idmap b/q2_types/hmmer/tests/data/bacteria/bacteria.hmm.idmap similarity index 100% rename from q2_types/hmmer/tests/data/hmmer/bacteria/bacteria.hmm.idmap rename to q2_types/hmmer/tests/data/bacteria/bacteria.hmm.idmap diff --git a/q2_types/hmmer/tests/data/hmmer/bacteria/a.fa b/q2_types/hmmer/tests/data/hmmer/bacteria/a.fa deleted file mode 100644 index 5abab3f7..00000000 --- a/q2_types/hmmer/tests/data/hmmer/bacteria/a.fa +++ /dev/null @@ -1,6 +0,0 @@ ->234831.PSM_A0151 -MKLLYWLDEWLTLSDNEKQAKLPTSGGDLLGDVYVKYHFVDLNNPLLFTFSPAGTDVKERDLNEDFAPWGYHLAQKQNVNIIAFQHLGKSNWFRNRNLIFFIEQLSTLLSPFETKLGYGLSRGGFAVGAFAKLLKLDKVLLFHPVSTKNKLIAPWDDRSSTDIAQQYDWQGDYHDLDLGDAQGYIIYDPTNCIDRQHAKRYKQLTHLRVFGMGHGTHATYLNKFGFYKQVAIDFIANQQIDIAQFRLQTKTLRLKEDYYKKLNKANANSPHRQALLSTAHTILIDEKAAHVQEHQEKIDIQPLIDVAIKHQDENPNDAIKLLEVAQQLAPDDPLVEHKLRQLE ->225849.swp_4415 -MLSPFSIYLEQIQEQLALLLFNQTVRLQVDDVVIQYHIFDTAQPLMITFPPGSEAFSDSDLIENKTPWGYDFFAKRRMNVISFNHIGKGNYFTSNELVIFTEKLGKHLDCFCERIGYGVSRGGFATSMFSKNLRLDRALLLMPISTYDISIASWDPKVREAAQHLNASPDSADCDIPLTIIYDPLYKPDSMHMKRFQSCRVRFPLPGVGHRIPRALLQLGILKSTILQYRQQQIDPASFFLKIRKRRTLSFFYRGLQSCNNTSRFSLRSRVILFHRIQYHINHLDIDPKKIYQQLSESIQKRCFYTTERIFGHGYKNVAGLSALVLC ->87626.PTD2_14957 -MKLLYWLDEWLTLSRDEQQARLPMRGDDLLDDVFVKYDFVDLDKPLLFTFSPAGTNVQEQDLHSDFAPWGYKLGKKQNVNIISFQHLGKSNWFRSRNLIFFLEQLSPLLEPFNQRLGYGLSRGGFAVGAFANLLKLDQVLLFHPVSTKNKQIATWDDRSSTDIAQQFDWQGDYHDLDLGHAKGYIIYDPTNHIDRMHAKRYQQLTHLRVFGMGHGTHATYLNKFGFYKQVAVDFIGNQQIEIAQFRQQTKTLRFKEDYYKRLNRANKNSAHRLGLLSKAHNIVIGEKEEHVQEHQAQIDIQPLIDVALKYQDKHPEDAIELLKVAQQLAPDDPLVEHKLKQLE \ No newline at end of file diff --git a/q2_types/hmmer/tests/data/hmmer/bacteria/b.fa b/q2_types/hmmer/tests/data/hmmer/bacteria/b.fa deleted file mode 100644 index bc655eca..00000000 --- a/q2_types/hmmer/tests/data/hmmer/bacteria/b.fa +++ /dev/null @@ -1,12 +0,0 @@ ->1268239.PALB_13220 -MKLLYWLDDWLTASRDEQQARLPMMGDDLLDDVFVKYHFVDINKPLLFTFSPAGTNVQEHDLHEDFAPWGYRLAQKQGVNIIAFQHLGKSNWFRSRNLIFFLEQLATLLTPFERRLGYGLSRGGFGVGAFANLLGLDEVLLFHPVSTKNKDKVPWDTRSSTDIAQKFDWRGDFHDVDLGHAKGYIIYDPTNPIDRLHAKRYGQLTHLRVFGMGHGTHATYLNKFGFYKQVAVDFIRHQVIDIAQFRQQTKTLRFKEDYYKRLKKANAQSSHRKGLLKKAHQILKDEKQEHVQEHQAQIDIQPLIDIAMKHQEKHPEDALQLLEVAQQLAPDDPLVEHKIKQLGE ->1116375.VEJY3_16241 -MKLLYWLDEWLTLSRKEQETRLPISGEDLLDDVFVKYEFVDLNKPLLFTFSPAGTNLQVQDLHPDFAPWGYRLAQKQKVNIISFQHLGKSNWFRSRNLIFFLEQLSTLLAPFECRLGYGLSRGGFAIGAFANLLKLDQVLLFHPVSTKNQALVPWDNRSSTEIAQQFDWDGDYHDLDLGDARGYIIYDPTNDIDRLHAKRYPELTHLRVYGMGHGTHATYMNKFGFYKQVAADFIRHQQIDIAQFRHQTKTLRLKEDYYHCLNKANASSQHRLNLLSTAHNVLIDEKKEHVKEHQAQIDIQPLVDIALKHEHDNPQDTVQLLEVAQQLVPGDPLVEHKLQQLT ->1307437.J139_15221 -MKLLYWLDEWLTLCRDEQQTKLPMCGGDLLGDVYVKYDFVDLNKPLLFTFSPAGTNVQEHDLTDDFAPWGYHLAQKQNVNVISFQHLGKSNWFRSRNLIFFLEQLSSLLTPFKCRLGYGLSRGGFAVGAFAKLLKLDQVLFFHPVSTKNTETVPWDTRSSTELAQQFDWQSEYNDLDLGHAKGYIIYDPTNKIDRLHAKRYPQLTHLRVFGMGHGTHASYLTKFGFYKQVAVDFIRHQQIDIAQFRLQTKTLRLKEEYYQSLNKANASSPHRLALLSTAHQILADEKEVHVQEHQAKIDIQPLIDVALKHQDEHPNDAIQLLEVAQQIVPDDPLVEHKLKQLE ->1328313.DS2_04565 -MKLLYWLDDWLSQTPEQQQTSLPFAGSDLLGDVFVKYHFIDTNKPLLFTFSPAGTNLQEQDLHEDFNPWGYKLARSQQVNIISFQHLGRSNWFRSRNLIFFIEQLAELLGPFKCRLGYGLSRGGFGVGAFANLLKLDQVLLFHPVSTKNKAKVPWDQRSSTDIAQKFDWLGDYHDVDLGHAKGYIIYDPTNPTDRQHAKRYPQLNHLRVYGMGHGTHATYLTKFGFYKQVAVDFIANQQIDVAAFRQQTRTLRFKEDYYKKLNKANAQSAHRLSLLSKAHQILLEEKAQHIQDHQAQIDVQPLVDIALKHEQDNPQDAIQLLEVAQQLSPEDPLIDHKLKQLK ->1333507.AUTQ01000270_gene2952 -MKLLYWLDEWLTLPRNEQQTRLPMTGSDLLGDVFVKYDFVDVNKPLLFTFSPAGTNVQEQDLHPDFAPWGYHLAQKQNVNVIAFQHLGKSNWFRNRNLIFFLEQLSTLLTPFNCRLGYGLSRGGFAVGAFAKLLKLNQVLFFHPVSTKNKELVSWDDRSSTDIAQQFDWQQDYHDLDLGDAQGYIIYDPTNRIDRMHAKRYKQLTHLRVFGMGHGTHATYLNKFGFYKQVAVDFIQHQQIDIAQFRLQTKTLRFKEDYYKRLNKANTNSVHRKELLSKAHNILIDEKKVHIQEHQKKIDIQPLIDVALKHQDEHPADAIQLLEVAQKLVPGDPLIEHKLKQLE ->1336233.JAEH01000031_gene235 -MLTLHEDVRIDSCDMSYQYHIVDLSKPVVLCFAPGNSGTDRMDMQQNLWGFDYLKSRKMNVLSITHNGQQNFYQSQACMDIFNALGECLAVFPERIAYGSSRGCFAIGLHAKRLGLDRALMMMPISSMNAELAPQEPKVKQYGAHPNWQGPHNDAAICDIPLTVICDSLYPADHHHYRRFSNVVQFLRLPGVGHRVPSVLNKMGMLSKVVIDYLHNEIDTQAFYKEARKRRQLNVYYRQLLRDPTGKLTTKRKFILRKHQTHVAVSNLSQQLSAKGSAKASAAKQWLIAKKPNLSLIK \ No newline at end of file diff --git a/q2_types/hmmer/tests/data/hmmer/bacteria/b2.fa b/q2_types/hmmer/tests/data/hmmer/bacteria/b2.fa deleted file mode 100644 index c354a353..00000000 --- a/q2_types/hmmer/tests/data/hmmer/bacteria/b2.fa +++ /dev/null @@ -1,10 +0,0 @@ ->722419.PH505_aa01730 -MKLLYWLDEWLTLPRNEQQTRLPMTGSDLLGDVFVKYDFVDVNKPLLFTFSPAGTNVQEQDLHPDFAPWGYHLAQKQNVNVIAFQHLGKSNWFRNRNLIFFLEQLSTLLTPFNCRLGYGLSRGGFAVGAFAKLLKLNQVLFFHPVSTKNKELVPWDDRSSTDIAQQFDWQQDYHDLDLGDAQGYIIYDPTNRIDRMHAKRYKQLTHLRVFGMGHGTHATYLNKFGFYKQVAVDFIQHQQIDIAQFRMQTKTLRFKEDYYKSLNKANVNSAHRQTLLSKAHNILIDEKEVHVQEHQAKIDIQPLIDVALKHQDEHPNDAIQLLEVAQQLVPDDPLIEHKIKQLEE ->425104.Ssed_4030 -MLKFNQDVRQRIGNVIMLYRLVDVNAPVVITFPPADHGVAESEAWSCTPWGFDFLTSQKINTISFADIGEHFYYHSAEFVNFIELLAQELVIFPQRLGYGVSKGGFGVSLHADRLGLDRALLMMPLSTFNDKKAPWDSAAIRASKAVDCSSPLNDSCRCQTPLTIIFDPLNPRDRRQAVRFRSTSVSLKLPGVGHRIPRALQELGLLKKLVLDFIHNRLDTDAFPGQVRKRRTLSVYYRNLLSNPTQKLTFKRKIVLYYHKLNLQLANIEDEPARILCRIKQSLRKRKYLVEKCHIQLQHVIAERQLALCTAMVFCL ->316275.VSAL_II0711 -MKLLYWLDEWLTHSRSEQQAQLPMSGGDLLDDVFVKYEFVDLDKPLLFTFSPAGTNVQEQDINADFAPWGYHLAQKQQVNIISFQHLGKSNWFRNRNLIFFLEQLTTLLDPFTYRLGYGLSRGGFAVGAFANLLELDQVLFFHPVSTKNQEIAPWDDRSSTELAQKFDWLGDYHDLNLGKAKGYIIYDPTNRIDRLHAKRYPELTHLRVFGMGHGTHSTYLNKFGFYKQVAVDFIRHQKIDIAQFRQQTKTLRFKEDYYQRLNKANSSSEHRLGLLSKAHNILIDEKEAHVQEHQAQLDVQPLIDIALKHQDEHPQDAIQLLEMALKLVPDDPLVERKLKQLI ->312309.VF_1348 -MKLLYWLDEWLTNSRSQQQARLPMTGSDLLDDVFVKYEFVDLDKPLLFTFSPAGTNLKEQDLHEDYAPWGYHLARKQDVNVISFQHLGQSNWFRSRNLIFFLEQLSTLLEPFTYRLGYGLSRGGFAIGAFANLLQLDQVLLFHPVSTKNQNIAPWDDRSSTEIAQKFDWEGDYHDLDLGKAKGYIIYDPTNRIDRLHAKRYPELTHLRVFGMGHGTHATYLNKFGFYKQVAVDFMRHQKVDIAQFRQQTKTLRFKEDYYKRLNKANSSSEHRLGLLSKAHNIVIGEKEAHVQEHQAQIDVQPLIDIALKHKDEHPKDAIQLLEMAQLLVPDDPLVEHKLKQLA ->1454202.PPBDW_90566___1 -MKLLYWLDEWLTHSRSQQQAQLPMSGGDLLDDVFVKYEFVDVNKPLLFTFSPAGTNVQEQDLDDDFAPWGYHLARKQQVNVISFQHLGKSNWFRSRNLIFFLEQLTTLLEPFNYRLGYGLSRGGFAVGAFANLLQLDQVLLFHPVSTKNKSIAPWDDRSSTALAQQFDWEGDYHDLNLGKAKGYIIYDPTNNIDRLHAKRYPELTHLRVFGMGHGTHATYLNKFGFYKQVAVDFIRYQKIDIAQFRQQTKTLRFKEDYYQSLNKANAYSEHRLGLLSTAHNILIDEKEAHVQEHQAQIDVQPLIDIAIKHQDDYPQDAIQLLEMAQQLVPNDPLVEHKLRQLL \ No newline at end of file diff --git a/q2_types/hmmer/tests/data/hmms/amino.hmm b/q2_types/hmmer/tests/data/hmms/amino.hmm new file mode 100644 index 00000000..832a1288 --- /dev/null +++ b/q2_types/hmmer/tests/data/hmms/amino.hmm @@ -0,0 +1,37 @@ +HMMER3/f [3.1b2 | February 2015] +NAME COG1413.faa.final_tree.fa +LENG 5 +ALPH amino +RF no +MM no +CONS yes +CS no +MAP yes +DATE Fri Sep 28 16:11:42 2018 +NSEQ 9184 +EFFN 9184.000000 +CKSUM 4188890012 +STATS LOCAL MSV -7.8758 1.53292 +STATS LOCAL VITERBI -9.2150 1.53292 +STATS LOCAL FORWARD -1.5202 1.53292 +HMM A C D E F G H I K L M N P Q R S T V W Y + m->m m->i m->d i->m i->i d->m d->d + COMPO 2.15961 4.83096 2.77614 2.66643 3.50057 2.79412 3.93266 3.01265 3.03101 2.13764 4.11997 3.37590 2.99384 3.18466 2.69554 2.85142 3.03638 2.77312 4.20758 3.64868 + 2.22461 4.68737 2.78288 2.68609 3.36090 2.76898 3.86143 3.03984 3.09492 2.14663 4.01338 3.37541 2.96650 3.17989 2.72654 2.84544 3.01045 2.78385 4.15025 3.55881 + 8.73029 0.00239 6.10854 5.69023 0.00338 0.00000 * + 1 1.66767 4.71954 3.08045 2.87247 3.65571 2.26664 3.96771 3.19781 3.23087 2.26570 4.51338 3.75729 3.42258 3.36819 2.77826 2.88913 2.97233 2.65924 4.72973 4.00800 5552 a - - - + 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503 + 0.00571 12.65184 5.16945 0.61958 0.77255 0.04657 3.09005 + 2 2.31156 5.04536 3.24595 3.10731 3.13564 3.46905 4.11287 2.66796 3.74456 1.11103 3.61586 3.81646 3.21608 3.67606 3.55457 3.32566 3.44135 2.79225 4.73964 4.09590 5553 l - - - + 2.11512 4.85437 2.73686 2.66830 3.57135 2.79738 3.88428 3.06039 3.14725 2.13746 4.16816 3.44640 2.97082 3.19489 2.64650 2.85141 3.05782 2.74390 4.18075 3.68214 + 7.05252 0.00117 8.11295 4.96035 0.00704 4.03374 0.01787 + 3 1.09776 4.41072 3.43804 3.12910 3.67283 3.24355 4.26962 3.06573 3.50913 2.39573 4.44403 3.54797 3.12238 3.85454 3.00890 3.10175 3.29924 2.61831 4.53614 4.45233 7323 a - - - + 2.51762 4.44879 2.78132 2.80296 3.26580 2.56005 3.57238 3.18520 2.78218 2.48137 4.08040 2.89413 2.89714 3.32910 2.94074 2.49537 2.81032 2.78129 4.68396 3.70182 + 0.66765 3.64611 0.77437 2.25260 0.11107 2.24263 0.11225 + 4 1.27089 4.11207 4.48177 3.67245 3.38524 4.31027 4.80205 2.11097 4.37564 1.49386 3.77536 5.01647 5.09441 4.35276 4.24417 4.13032 3.83956 2.05986 5.86496 3.82566 7345 a - - - + 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503 + 0.00001 11.96592 12.68827 0.61958 0.77255 0.01113 4.50333 + 5 2.27563 4.48763 2.63898 2.21860 3.77175 3.23037 4.08698 3.08710 3.06519 2.13574 4.28409 3.30047 3.45142 3.04825 2.61723 2.79292 2.97656 2.96078 3.65679 3.75327 7346 l - - - + 2.14140 4.92283 2.78072 2.65305 3.57236 2.80660 3.99033 2.98567 2.96282 2.13610 4.17486 3.35504 3.01407 3.18202 2.68913 2.85327 3.04458 2.77653 4.24923 3.69308 + 9.23040 0.00010 * 6.21894 0.00199 0.00000 * +// diff --git a/q2_types/hmmer/tests/data/hmms/dna.hmm b/q2_types/hmmer/tests/data/hmms/dna.hmm new file mode 100644 index 00000000..99341b3f --- /dev/null +++ b/q2_types/hmmer/tests/data/hmms/dna.hmm @@ -0,0 +1,265 @@ +HMMER3/f [3.1 | February 2013] +NAME MADE1 +ACC DF0000629.2 +DESC MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon +LENG 80 +MAXL 426 +ALPH DNA +RF yes +MM no +CONS yes +CS no +MAP yes +DATE Tue Feb 19 20:33:41 2013 +NSEQ 1997 +EFFN 3.911818 +CKSUM 3015610723 +STATS LOCAL MSV -8.5786 0.71858 +STATS LOCAL VITERBI -9.3632 0.71858 +STATS LOCAL FORWARD -3.4823 0.71858 +HMM A C G T + m->m m->i m->d i->m i->i d->m d->d + COMPO 1.24257 1.59430 1.62906 1.16413 + 1.38629 1.38629 1.38629 1.38629 + 0.03960 3.94183 3.94183 1.46634 0.26236 0.00000 * + 1 2.69765 2.44396 2.81521 0.24089 1 t x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03960 3.94183 3.94183 1.46634 0.26236 1.09861 0.40547 + 2 2.72939 2.37873 2.85832 0.24244 2 t x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03725 4.00179 4.00179 1.46634 0.26236 1.09861 0.40547 + 3 0.16099 3.16370 2.87328 2.99734 3 a x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03604 4.03416 4.03416 1.46634 0.26236 1.09861 0.40547 + 4 1.98862 2.42132 0.42649 2.10770 4 g x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03539 4.05203 4.05203 1.46634 0.26236 1.09861 0.40547 + 5 1.96369 2.69532 0.36534 2.32099 5 g x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03764 4.06427 3.92372 1.46634 0.26236 1.09861 0.40547 + 6 2.56994 2.11239 2.71946 0.30571 6 t x - - + 1.37159 1.41129 1.39124 1.37159 + 0.03806 3.89715 4.07214 1.50442 0.25122 1.00714 0.45454 + 7 2.58388 2.10353 2.64646 0.31253 12 t x - - + 1.38764 1.38524 1.38764 1.38465 + 0.03494 4.03864 4.09125 1.40070 0.28293 1.09237 0.40860 + 8 2.18552 2.70201 0.28821 2.64645 14 g x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03628 4.09157 3.96779 1.46634 0.26236 1.09861 0.40547 + 9 2.16916 2.82142 0.28427 2.60854 15 g x - - + 1.38091 1.39033 1.38365 1.39033 + 0.03566 4.00237 4.08886 1.38021 0.28972 1.01958 0.44745 + 10 2.45517 2.15232 2.42886 0.34277 18 t x - - + 1.39065 1.39065 1.39065 1.37335 + 0.03536 4.01212 4.09576 1.39554 0.28462 1.09775 0.40589 + 11 2.10260 2.95484 0.28160 2.64222 21 g x - - + 1.36740 1.40555 1.40555 1.36740 + 0.03843 3.92069 4.02468 1.44733 0.26814 1.09856 0.40549 + 12 2.54740 0.30185 2.61355 2.21647 26 c x - - + 1.38748 1.38276 1.38748 1.38748 + 0.03457 4.05446 4.09623 1.40847 0.28040 1.05496 0.42803 + 13 0.28443 2.72003 2.32214 2.48149 28 a x - - + 1.38740 1.38740 1.38298 1.38740 + 0.03441 4.05976 4.10001 1.41198 0.27926 1.09780 0.40587 + 14 0.29412 2.55413 2.49679 2.35701 30 a x - - + 1.38194 1.39067 1.38194 1.39067 + 0.03505 4.02482 4.10005 1.39522 0.28473 1.09929 0.40512 + 15 0.18837 2.99710 2.82270 2.77556 33 a x - - + 1.39015 1.39472 1.37503 1.38539 + 0.03725 3.97815 4.02618 1.37955 0.28994 1.10102 0.40426 + 16 0.50816 2.05151 2.22111 1.82407 37 a x - - + 1.36727 1.38730 1.39683 1.39405 + 0.04830 3.89881 3.61610 1.29026 0.32186 1.05306 0.42905 + 17 2.11260 2.73141 0.29747 2.64152 41 g x - - + 1.36913 1.40376 1.40376 1.36913 + 0.03705 3.93681 4.08299 1.44872 0.26771 1.07479 0.41759 + 18 2.24459 1.90539 2.34054 0.43234 46 t x - - + 1.33632 1.42493 1.39937 1.38665 + 0.04427 3.64574 4.06297 1.70501 0.20061 1.21309 0.35279 + 19 0.44322 2.17202 2.18055 2.03175 57 a x - - + 1.41047 1.41471 1.36338 1.35797 + 0.03970 3.81957 4.07540 1.65588 0.21186 1.22788 0.34660 + 20 0.33340 2.42691 2.40824 2.25160 66 a x - - + 1.29389 1.44615 1.37917 1.43324 + 0.04223 3.70146 4.09459 1.55158 0.23815 1.05880 0.42598 + 21 2.50563 1.98543 2.69601 0.33746 74 t x - - + 1.39462 1.39462 1.42862 1.32990 + 0.04184 3.80216 3.98177 1.80466 0.17976 1.00279 0.45705 + 22 2.54484 1.97505 2.66483 0.33806 84 t x - - + 1.39134 1.39489 1.38662 1.37246 + 0.03877 3.97504 3.95038 1.37620 0.29107 1.13932 0.38572 + 23 2.10159 2.83856 0.29282 2.61635 88 g x - - + 1.39682 1.39682 1.35536 1.39682 + 0.05046 3.75402 3.65808 1.08330 0.41321 1.13019 0.39004 + 24 2.25298 0.61854 2.50691 1.29221 90 c x - - + 1.35803 1.49605 1.46737 1.24379 + 0.06091 3.28322 3.83564 1.89752 0.16245 1.28788 0.32276 + 25 1.27819 2.23285 0.76242 1.91259 106 g x - - + 1.29024 1.67349 1.68279 1.04597 + 0.05752 3.44263 3.73311 2.58671 0.07825 1.26818 0.33037 + 26 1.86925 2.58352 0.39466 2.33986 131 g x - - + 1.31084 1.49412 1.46666 1.29002 + 0.04698 3.54257 4.07715 2.25245 0.11109 0.86163 0.54900 + 27 2.38297 1.93394 2.39162 0.39800 151 t x - - + 1.33582 1.47359 1.44163 1.30411 + 0.04951 3.48445 4.03783 2.15951 0.12260 1.21681 0.35122 + 28 2.41717 2.17810 2.62774 0.32113 170 t x - - + 1.36805 1.48060 1.37439 1.32840 + 0.04849 3.50958 4.05014 2.58370 0.07850 1.22399 0.34822 + 29 2.57764 2.35132 2.56552 0.28512 194 t x - - + 1.43829 1.43458 1.24787 1.43829 + 0.04667 3.56670 4.05428 2.49706 0.08591 1.23744 0.34267 + 30 2.47248 2.07688 2.62257 0.33172 215 t x - - + 1.25120 1.52623 1.70635 1.15531 + 0.08932 3.31524 3.01336 2.81842 0.06156 1.22909 0.34610 + 31 2.25937 2.13157 2.02027 0.43957 248 t x - - + 1.18172 1.43522 1.72841 1.28150 + 0.07936 2.93117 3.77395 2.46269 0.08906 0.60457 0.79034 + 32 2.04508 2.84981 0.30490 2.58263 280 g x - - + 1.17665 1.66785 1.66218 1.16056 + 0.05998 3.23615 3.96853 2.83684 0.06040 1.01952 0.44749 + 33 2.45103 0.38098 2.56776 1.87147 317 c x - - + 1.24153 1.52524 1.60663 1.22783 + 0.05538 3.39046 3.90294 2.73920 0.06680 1.18729 0.36391 + 34 2.22082 0.36258 2.75077 2.02704 347 c x - - + 1.15008 1.62014 1.86511 1.10673 + 0.06086 3.18178 4.04341 2.94504 0.05403 1.25991 0.33363 + 35 0.27033 2.66664 2.52541 2.43767 388 a x - - + 1.24951 1.47565 1.41392 1.42074 + 0.07123 3.00373 3.95552 3.13655 0.04440 1.28173 0.32512 + 36 2.83107 2.41670 2.97197 0.22235 439 t x - - + 1.37071 1.57683 1.38637 1.23972 + 0.05293 3.45216 3.91807 2.54402 0.08181 1.14651 0.38235 + 37 2.52322 2.25084 2.45909 0.31611 465 t x - - + 1.26335 1.55077 1.59008 1.19965 + 0.07504 3.13329 3.55006 3.08962 0.04659 1.13108 0.38962 + 38 0.45807 2.30687 1.98940 2.03143 512 a x - - + 1.15472 1.67511 1.53797 1.26320 + 0.09820 3.13076 2.99876 2.79197 0.06326 1.39915 0.28343 + 39 2.37471 0.42180 2.44763 1.80427 550 c x - - + 1.23785 1.49058 1.48364 1.35502 + 0.06081 3.19472 4.01643 2.41851 0.09327 0.94671 0.49105 + 40 2.32826 1.95481 2.36781 0.40458 578 t x - - + 1.36586 1.46001 1.43000 1.29720 + 0.05257 3.39673 4.03256 1.84862 0.17133 1.40979 0.27997 + 41 2.68669 2.13935 2.81520 0.28200 592 t x - - + 1.34965 1.42793 1.45781 1.31633 + 0.04735 3.57826 3.99988 2.09424 0.13144 1.22129 0.34934 + 42 2.55904 2.16444 2.70859 0.29952 609 t x - - + 1.12072 1.61936 1.63578 1.26895 + 0.07346 3.25910 3.42962 2.85641 0.05919 1.38363 0.28857 + 43 1.99923 1.61027 2.26343 0.57851 646 t x - - + 1.32290 1.58747 1.61095 1.11018 + 0.06656 3.08568 3.97944 2.44774 0.09046 0.75593 0.63407 + 44 0.23887 2.79899 2.55209 2.60783 675 a x - - + 1.18557 1.50323 1.59070 1.31590 + 0.05597 3.38637 3.88222 2.46900 0.08847 1.27945 0.32599 + 45 0.29593 2.53488 2.53903 2.32335 701 a x - - + 1.08710 1.54222 1.59276 1.40430 + 0.07539 2.94521 3.91062 1.91623 0.15918 1.22327 0.34852 + 46 2.58352 2.40524 2.76700 0.25955 725 t x - - + 1.19685 1.58503 1.74852 1.14293 + 0.06124 3.18279 4.02089 2.82961 0.06085 1.05474 0.42814 + 47 2.13251 2.88788 0.29508 2.50964 764 g x - - + 1.20891 1.55463 1.68206 1.19000 + 0.06526 3.12574 3.94910 2.41448 0.09367 1.10396 0.40280 + 48 2.23841 2.99164 0.25118 2.72900 792 g x - - + 1.26330 1.55339 1.52606 1.24355 + 0.05464 3.34968 4.01313 2.78872 0.06347 1.15133 0.38012 + 49 2.57533 0.32900 2.64632 2.01501 824 c x - - + 1.35118 1.39828 1.40141 1.39516 + 0.04340 3.79297 3.91506 1.59549 0.22666 1.20075 0.35806 + 50 0.46433 2.04127 2.23437 2.00605 833 a x - - + 1.23062 1.36903 1.62282 1.36182 + 0.05764 3.31530 3.92762 2.28791 0.10700 1.07910 0.41536 + 51 0.27513 2.77017 2.28518 2.57549 853 a x - - + 1.27958 1.58726 1.46109 1.25394 + 0.05750 3.30072 3.96214 2.60776 0.07656 1.25708 0.33475 + 52 0.20149 2.86434 2.84551 2.69770 883 a x - - + 1.23645 1.62259 1.71174 1.10368 + 0.05756 3.26729 4.02702 2.54508 0.08172 1.27391 0.32814 + 53 0.26982 2.65833 2.50477 2.46835 911 a x - - + 1.36005 1.50358 1.48100 1.22550 + 0.06921 3.37553 3.42118 2.36646 0.09851 1.27560 0.32748 + 54 0.40022 2.19284 2.22687 2.20396 934 a x - - + 1.12070 1.60472 1.53213 1.35895 + 0.05523 3.36752 3.94966 2.42917 0.09224 0.84774 0.55928 + 55 2.11356 0.46400 2.46442 1.79955 960 c x - - + 1.23932 1.35913 1.50478 1.46331 + 0.05187 3.47055 3.94022 2.35854 0.09933 1.12102 0.39445 + 56 1.85868 0.79440 2.22069 1.25971 983 c x - - + 1.21951 1.50212 1.51138 1.34185 + 0.06404 3.29054 3.69705 1.75742 0.18933 1.18410 0.36532 + 57 1.33272 2.32720 0.71452 1.90215 999 g x - - + 1.12229 1.49343 1.56653 1.42255 + 0.04920 3.46654 4.08749 2.17995 0.11996 1.31769 0.31164 + 58 2.48337 0.43652 2.46331 1.68683 1017 c x - - + 1.34704 1.55461 1.38112 1.28222 + 0.04823 3.61532 3.90311 2.20911 0.11631 1.00864 0.45368 + 59 0.41659 2.44509 1.93972 2.20507 1034 a x - - + 1.38198 1.38198 1.39194 1.38932 + 0.03641 3.98130 4.06929 1.35873 0.29704 1.31330 0.31325 + 60 0.41612 2.39160 1.97116 2.21075 1037 a x - - + 1.03649 1.46430 1.57421 1.57557 + 0.04769 3.52580 4.06641 2.32461 0.10294 0.84329 0.56263 + 61 2.66264 2.12302 2.82746 0.28581 1056 t x - - + 1.36925 1.39635 1.38930 1.39048 + 0.04097 3.97400 3.84718 1.39433 0.28502 1.12205 0.39395 + 62 2.26510 2.13196 2.42551 0.37231 1060 t x - - + 1.37965 1.39147 1.39147 1.38264 + 0.04082 3.91610 3.90805 1.24613 0.33914 0.95192 0.48776 + 63 0.41244 2.25761 2.16787 2.12907 1062 a x - - + 1.34515 1.41203 1.41203 1.37753 + 0.04054 3.77835 4.08203 1.30483 0.31638 1.11819 0.39582 + 64 2.51464 0.37905 2.62296 1.82008 1068 c x - - + 1.39543 1.38753 1.39233 1.37008 + 0.03854 3.90584 4.03535 1.36573 0.29463 1.13682 0.38689 + 65 2.16380 2.11332 2.18714 0.42765 1073 t x - - + 1.38764 1.38471 1.38519 1.38764 + 0.03575 4.05376 4.03073 1.40080 0.28289 1.03825 0.43707 + 66 2.79349 2.39141 2.87271 0.23478 1075 t x - - + 1.37227 1.39101 1.39101 1.39101 + 0.03597 4.01447 4.05827 1.39017 0.28639 1.06429 0.42308 + 67 2.82488 2.47749 2.93179 0.21887 1078 t x - - + 1.38141 1.39112 1.38915 1.38353 + 0.03661 3.99477 4.04370 1.35958 0.29675 1.13439 0.38804 + 68 2.77679 2.30433 2.90694 0.24425 1081 t x - - + 1.37593 1.38989 1.45520 1.32825 + 0.04447 3.68736 3.99242 1.76176 0.18843 0.98580 0.46703 + 69 2.47698 3.17398 0.19595 2.95437 1093 g x - - + 1.38264 1.38264 1.39734 1.38264 + 0.05358 3.96553 3.40487 1.40348 0.28202 1.03112 0.44100 + 70 2.84327 0.27906 2.97336 2.00890 1097 c x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03412 4.08811 4.08811 1.46634 0.26236 0.69006 0.69625 + 71 0.21870 2.83638 2.69251 2.65798 1098 a x - - + 1.37446 1.37942 1.39640 1.39509 + 0.03670 3.93983 4.09935 1.41905 0.27700 1.10002 0.40476 + 72 2.35233 0.46085 2.23804 1.78715 1103 c x - - + 1.38536 1.38781 1.38781 1.38421 + 0.03493 4.03822 4.09272 1.39310 0.28542 1.09638 0.40658 + 73 2.57111 0.32543 2.74124 1.98892 1105 c x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03381 4.09688 4.09688 1.46634 0.26236 1.09626 0.40664 + 74 0.27014 2.61416 2.53262 2.47636 1106 a x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03461 4.09267 4.05587 1.46634 0.26236 1.09748 0.40603 + 75 0.52873 2.16549 1.91736 1.90409 1107 a x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03426 4.08396 4.08396 1.46634 0.26236 1.07423 0.41788 + 76 2.33134 0.38082 2.65861 1.90055 1108 c x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03466 4.07266 4.07266 1.46634 0.26236 1.09861 0.40547 + 77 2.20588 0.45134 2.35553 1.84373 1109 c x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03550 4.04912 4.04912 1.46634 0.26236 1.09861 0.40547 + 78 2.69018 2.22054 2.82311 0.26898 1110 t x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03711 4.00561 4.00561 1.46634 0.26236 1.09861 0.40547 + 79 0.16248 3.15867 2.86159 2.98963 1111 a x - - + 1.38629 1.38629 1.38629 1.38629 + 0.04048 3.92018 3.92018 1.46634 0.26236 1.09861 0.40547 + 80 0.17484 3.04770 2.86638 2.88183 1112 a x - - + 1.38629 1.38629 1.38629 1.38629 + 0.02045 3.90014 * 1.46634 0.26236 0.00000 * +// \ No newline at end of file diff --git a/q2_types/hmmer/tests/data/hmmer/invalid_idmaps/1.hmm.idmap b/q2_types/hmmer/tests/data/invalid_idmaps/1.hmm.idmap similarity index 100% rename from q2_types/hmmer/tests/data/hmmer/invalid_idmaps/1.hmm.idmap rename to q2_types/hmmer/tests/data/invalid_idmaps/1.hmm.idmap diff --git a/q2_types/hmmer/tests/data/hmmer/invalid_idmaps/2.hmm.idmap b/q2_types/hmmer/tests/data/invalid_idmaps/2.hmm.idmap similarity index 100% rename from q2_types/hmmer/tests/data/hmmer/invalid_idmaps/2.hmm.idmap rename to q2_types/hmmer/tests/data/invalid_idmaps/2.hmm.idmap diff --git a/q2_types/hmmer/tests/data/hmmer/invalid_idmaps/3.hmm.idmap b/q2_types/hmmer/tests/data/invalid_idmaps/3.hmm.idmap similarity index 100% rename from q2_types/hmmer/tests/data/hmmer/invalid_idmaps/3.hmm.idmap rename to q2_types/hmmer/tests/data/invalid_idmaps/3.hmm.idmap diff --git a/q2_types/hmmer/tests/data/hmmer/invalid_idmaps/4.hmm.idmap b/q2_types/hmmer/tests/data/invalid_idmaps/4.hmm.idmap similarity index 100% rename from q2_types/hmmer/tests/data/hmmer/invalid_idmaps/4.hmm.idmap rename to q2_types/hmmer/tests/data/invalid_idmaps/4.hmm.idmap diff --git a/q2_types/hmmer/tests/test_format.py b/q2_types/hmmer/tests/test_format.py index 6672f96b..045dbf04 100644 --- a/q2_types/hmmer/tests/test_format.py +++ b/q2_types/hmmer/tests/test_format.py @@ -9,20 +9,25 @@ import shutil import os from qiime2.plugin.testing import TestPluginBase -from q2_types.hmmer._format import HmmIdmapFileFmt +from q2_types.hmmer._format import ( + HmmIdmapFileFmt, BaseHmmPressedDirFmt, AminoHmmFileFmt, DnaHmmFileFmt, + # RnaHmmFileFmt +) from qiime2.plugin import ValidationError -class TestRefFormats(TestPluginBase): - package = 'q2_types.hmm.tests' +class TestHmmFormats(TestPluginBase): + package = 'q2_types.hmmer.tests' - def test_HmmerDirFmt_valid(self): - fmt = ...(self.get_data_path("hmmer/bacteria"), 'r') + def test_HmmIdmapFileFmt_valid(self): + fmt = HmmIdmapFileFmt( + self.get_data_path("bacteria/bacteria.hmm.idmap"), 'r' + ) fmt.validate() - def test_HmmerDirFmt_invalid_idmap_1(self): - fmt = HmmIdmapFileFmt(self.get_data_path( - "hmmer/invalid_idmaps/1.hmm.idmap"), 'r' + def test_HmmIdmapFileFmt_invalid_idmap_1(self): + fmt = HmmIdmapFileFmt( + self.get_data_path("invalid_idmaps/1.hmm.idmap"), 'r' ) with self.assertRaisesRegex( ValidationError, @@ -31,9 +36,9 @@ def test_HmmerDirFmt_invalid_idmap_1(self): ): fmt.validate(level="min") - def test_HmmerDirFmt_invalid_idmap_2(self): - fmt = HmmIdmapFileFmt(self.get_data_path( - "hmmer/invalid_idmaps/2.hmm.idmap"), 'r' + def test_HmmIdmapFileFmt_invalid_idmap_2(self): + fmt = HmmIdmapFileFmt( + self.get_data_path("invalid_idmaps/2.hmm.idmap"), 'r' ) with self.assertRaisesRegex( ValidationError, @@ -42,9 +47,9 @@ def test_HmmerDirFmt_invalid_idmap_2(self): ): fmt.validate(level="min") - def test_HmmerDirFmt_invalid_idmap_3(self): - fmt = HmmIdmapFileFmt(self.get_data_path( - "hmmer/invalid_idmaps/3.hmm.idmap"), 'r' + def test_HmmIdmapFileFmt_invalid_idmap_3(self): + fmt = HmmIdmapFileFmt( + self.get_data_path("invalid_idmaps/3.hmm.idmap"), 'r' ) with self.assertRaisesRegex( ValidationError, @@ -52,9 +57,9 @@ def test_HmmerDirFmt_invalid_idmap_3(self): ): fmt.validate(level="min") - def test_HmmerDirFmt_invalid_idmap_4(self): - fmt = HmmIdmapFileFmt(self.get_data_path( - "hmmer/invalid_idmaps/4.hmm.idmap"), 'r' + def test_HmmIdmapFileFmt_invalid_idmap_4(self): + fmt = HmmIdmapFileFmt( + self.get_data_path("invalid_idmaps/4.hmm.idmap"), 'r' ) with self.assertRaisesRegex( ValidationError, @@ -63,27 +68,39 @@ def test_HmmerDirFmt_invalid_idmap_4(self): ): fmt.validate(level="min") - def test_HmmerDirFmt_missing_hmm(self): + def test_BaseHmmPressedDirFmt_missing_hmm(self): with tempfile.TemporaryDirectory() as tmp: shutil.copytree( - self.get_data_path("hmmer/bacteria"), tmp, dirs_exist_ok=True + self.get_data_path("bacteria"), tmp, dirs_exist_ok=True ) os.remove(f"{tmp}/bacteria.hmm.h3f") - fmt = ...(tmp, 'r') + fmt = BaseHmmPressedDirFmt(tmp, 'r') with self.assertRaisesRegex( ValidationError, "Missing one or more files" ): fmt.validate(level="min") - def test_HmmerDirFmt_missing_fa(self): + def test_BaseHmmPressedDirFmt_missing_idmap_ok(self): with tempfile.TemporaryDirectory() as tmp: shutil.copytree( - self.get_data_path("hmmer/bacteria"), tmp, dirs_exist_ok=True + self.get_data_path("bacteria"), tmp, dirs_exist_ok=True ) - for file in ["a", "b", "b2"]: - os.remove(f"{tmp}/{file}.fa") - fmt = ...(tmp, 'r') - with self.assertRaisesRegex( - ValidationError, "Missing one or more files" - ): - fmt.validate(level="min") + os.remove(f"{tmp}/bacteria.hmm.idmap") + fmt = BaseHmmPressedDirFmt(tmp, 'r') + fmt.validate(level="min") + + def test_BaseHmmPressedDirFmt_valid(self): + fmt = BaseHmmPressedDirFmt(self.get_data_path("bacteria"), 'r') + fmt.validate(level="min") + + def test_AminoHmmFileFmt_valid(self): + fmt = AminoHmmFileFmt(self.get_data_path("hmms/amino.hmm"), "r") + fmt.validate() + + def test_DnaHmmFileFmt_valid(self): + fmt = DnaHmmFileFmt(self.get_data_path("hmms/dna.hmm"), "r") + fmt.validate() + + # def test_RnaHmmFileFmt_valid(self): + # fmt = RnaHmmFileFmt(self.get_data_path("hmms/rna.hmm"), "r") + # fmt.validate() From 6ec9360ab695d412a28fdf5e0f6b628d7476c125 Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Tue, 4 Jun 2024 12:13:01 +0200 Subject: [PATCH 17/28] more tests for formats --- q2_types/hmmer/_format.py | 12 +- q2_types/hmmer/tests/data/hmms/2_dna.hmm | 348 +++++++++ q2_types/hmmer/tests/data/hmms/2_rna.hmm | 764 +++++++++++++++++++ q2_types/hmmer/tests/data/hmms/4_amino.hmm | 199 +++++ q2_types/hmmer/tests/data/hmms/amino_dna.hmm | 302 ++++++++ q2_types/hmmer/tests/data/hmms/rna.hmm | 382 ++++++++++ q2_types/hmmer/tests/data/hmms/rna_dna.hmm | 647 ++++++++++++++++ q2_types/hmmer/tests/test_format.py | 76 +- 8 files changed, 2723 insertions(+), 7 deletions(-) create mode 100644 q2_types/hmmer/tests/data/hmms/2_dna.hmm create mode 100644 q2_types/hmmer/tests/data/hmms/2_rna.hmm create mode 100644 q2_types/hmmer/tests/data/hmms/4_amino.hmm create mode 100644 q2_types/hmmer/tests/data/hmms/amino_dna.hmm create mode 100644 q2_types/hmmer/tests/data/hmms/rna.hmm create mode 100644 q2_types/hmmer/tests/data/hmms/rna_dna.hmm diff --git a/q2_types/hmmer/_format.py b/q2_types/hmmer/_format.py index 7eb26c55..466c2495 100644 --- a/q2_types/hmmer/_format.py +++ b/q2_types/hmmer/_format.py @@ -78,7 +78,13 @@ def _validate_file_fmt( tolerance = 0.0001 with HMMFile(str(self)) as hmm_file: - hmm_profiles = list(hmm_file) + try: + hmm_profiles = list(hmm_file) + except TypeError as e: + raise ValidationError( + "Found profiles with different alphabets.\n" + f"{e}" + ) if len(hmm_profiles) > 1 and single_profile: raise ValidationError( @@ -90,8 +96,8 @@ def _validate_file_fmt( if hmm_profile.alphabet.type.lower() != alphabet: raise ValidationError( - "Found profile with alphabet: " - f"{hmm_profile.alph.lower()}\n" + "Found profile with alphabet " + f"{hmm_profile.alphabet.type.lower()}\n" f"{self.__class__} only accepts {alphabet} profiles." ) diff --git a/q2_types/hmmer/tests/data/hmms/2_dna.hmm b/q2_types/hmmer/tests/data/hmms/2_dna.hmm new file mode 100644 index 00000000..8fbebcab --- /dev/null +++ b/q2_types/hmmer/tests/data/hmms/2_dna.hmm @@ -0,0 +1,348 @@ +HMMER3/f [3.3 | Nov 2019] +NAME 3box +LENG 20 +MAXL 75 +ALPH DNA +RF yes +MM no +CONS yes +CS yes +MAP yes +DATE Thu Mar 19 09:44:43 2020 +NSEQ 22 +EFFN 22.000000 +CKSUM 1874497603 +STATS LOCAL MSV -6.0857 0.74408 +STATS LOCAL VITERBI -6.5394 0.74408 +STATS LOCAL FORWARD -3.2146 0.74408 +HMM A C G T + m->m m->i m->d i->m i->i d->m d->d + COMPO 0.85352 3.67699 1.53220 1.10043 + 1.38629 1.38629 1.38629 1.38629 + 0.00830 5.48894 5.48894 1.46634 0.26236 0.00000 * + 1 0.00850 6.21185 5.49634 6.05242 1 A x - . + 1.38629 1.38629 1.38629 1.38629 + 0.00830 5.48894 5.48894 1.46634 0.26236 1.09861 0.40547 + 2 0.00850 6.21185 5.49634 6.05242 2 A x - . + 1.38629 1.38629 1.38629 1.38629 + 0.00830 5.48894 5.48894 1.46634 0.26236 1.09861 0.40547 + 3 0.07174 5.97461 2.75317 5.82342 3 A x - . + 1.38629 1.38629 1.38629 1.38629 + 0.00830 5.48894 5.48894 1.46634 0.26236 1.09861 0.40547 + 4 4.57582 4.38815 1.28255 0.35675 4 t x - . + 1.38629 1.38629 1.38629 1.38629 + 0.00830 5.48894 5.48894 1.46634 0.26236 1.09861 0.40547 + 5 5.04480 6.00326 0.01203 5.79333 5 G x - . + 1.38629 1.38629 1.38629 1.38629 + 0.00830 5.48894 5.48894 1.46634 0.26236 1.09861 0.40547 + 6 0.00850 6.21185 5.49634 6.05242 6 A x - . + 1.38629 1.38629 1.38629 1.38629 + 0.00830 5.48894 5.48894 1.46634 0.26236 1.09861 0.40547 + 7 0.16698 4.64734 5.03178 1.98292 7 a x - . + 1.38629 1.38629 1.38629 1.38629 + 0.00830 5.48894 5.48894 1.46634 0.26236 1.09861 0.40547 + 8 0.27480 2.21511 3.99743 2.18242 8 a x - . + 1.38629 1.38629 1.38629 1.38629 + 0.00830 5.48894 5.48894 1.46634 0.26236 1.09861 0.40547 + 9 0.81098 4.17232 4.48858 0.63691 9 t x - . + 1.38629 1.38629 1.38629 1.38629 + 0.27817 5.48894 1.43256 1.46634 0.26236 1.09861 0.40547 + 10 1.24764 1.81454 1.93201 0.90375 10 t . - . + 1.38629 1.38629 1.38629 1.38629 + 0.01086 5.22163 5.22163 1.46634 0.26236 0.15006 1.97081 + 11 2.62887 4.93134 0.09260 4.70307 11 G x - . + 1.38629 1.38629 1.38629 1.38629 + 0.00830 5.48894 5.48894 1.46634 0.26236 1.09861 0.40547 + 12 5.28311 4.88969 5.40686 0.01723 12 T x - . + 1.38629 1.38629 1.38629 1.38629 + 0.00830 5.48894 5.48894 1.46634 0.26236 1.09861 0.40547 + 13 2.55388 3.81030 2.68966 0.18371 13 t x - . + 1.38629 1.38629 1.38629 1.38629 + 0.00830 5.48894 5.48894 1.46634 0.26236 1.09861 0.40547 + 14 5.28311 4.88969 5.40686 0.01723 14 T x - . + 1.38629 1.38629 1.38629 1.38629 + 0.00830 5.48894 5.48894 1.46634 0.26236 1.09861 0.40547 + 15 5.04480 6.00326 0.01203 5.79333 15 G x - . + 1.38629 1.38629 1.38629 1.38629 + 0.00830 5.48894 5.48894 1.46634 0.26236 1.09861 0.40547 + 16 0.08235 4.92354 5.26649 2.70886 16 A x - . + 2.00737 2.00737 2.00737 0.51588 + 0.09989 2.39766 5.48894 0.57943 0.82147 1.09861 0.40547 + 17 2.48998 3.46889 1.86103 0.31413 19 t x - . + 1.38629 1.38629 1.38629 1.38629 + 0.00830 5.48894 5.48894 1.46634 0.26236 1.09861 0.40547 + 18 5.28311 4.88969 5.40686 0.01723 20 T x - . + 1.38629 1.38629 1.38629 1.38629 + 0.00830 5.48894 5.48894 1.46634 0.26236 1.09861 0.40547 + 19 0.00850 6.21185 5.49634 6.05242 21 A x - . + 1.38629 1.38629 1.38629 1.38629 + 0.00830 5.48894 5.48894 1.46634 0.26236 1.09861 0.40547 + 20 2.00283 3.72053 0.42449 1.67812 22 g x - . + 1.38629 1.38629 1.38629 1.38629 + 0.00416 5.48480 * 1.46634 0.26236 0.00000 * +// +HMMER3/f [3.1 | February 2013] +NAME MADE1 +ACC DF0000629.2 +DESC MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon +LENG 80 +MAXL 426 +ALPH DNA +RF yes +MM no +CONS yes +CS no +MAP yes +DATE Tue Feb 19 20:33:41 2013 +NSEQ 1997 +EFFN 3.911818 +CKSUM 3015610723 +STATS LOCAL MSV -8.5786 0.71858 +STATS LOCAL VITERBI -9.3632 0.71858 +STATS LOCAL FORWARD -3.4823 0.71858 +HMM A C G T + m->m m->i m->d i->m i->i d->m d->d + COMPO 1.24257 1.59430 1.62906 1.16413 + 1.38629 1.38629 1.38629 1.38629 + 0.03960 3.94183 3.94183 1.46634 0.26236 0.00000 * + 1 2.69765 2.44396 2.81521 0.24089 1 t x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03960 3.94183 3.94183 1.46634 0.26236 1.09861 0.40547 + 2 2.72939 2.37873 2.85832 0.24244 2 t x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03725 4.00179 4.00179 1.46634 0.26236 1.09861 0.40547 + 3 0.16099 3.16370 2.87328 2.99734 3 a x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03604 4.03416 4.03416 1.46634 0.26236 1.09861 0.40547 + 4 1.98862 2.42132 0.42649 2.10770 4 g x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03539 4.05203 4.05203 1.46634 0.26236 1.09861 0.40547 + 5 1.96369 2.69532 0.36534 2.32099 5 g x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03764 4.06427 3.92372 1.46634 0.26236 1.09861 0.40547 + 6 2.56994 2.11239 2.71946 0.30571 6 t x - - + 1.37159 1.41129 1.39124 1.37159 + 0.03806 3.89715 4.07214 1.50442 0.25122 1.00714 0.45454 + 7 2.58388 2.10353 2.64646 0.31253 12 t x - - + 1.38764 1.38524 1.38764 1.38465 + 0.03494 4.03864 4.09125 1.40070 0.28293 1.09237 0.40860 + 8 2.18552 2.70201 0.28821 2.64645 14 g x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03628 4.09157 3.96779 1.46634 0.26236 1.09861 0.40547 + 9 2.16916 2.82142 0.28427 2.60854 15 g x - - + 1.38091 1.39033 1.38365 1.39033 + 0.03566 4.00237 4.08886 1.38021 0.28972 1.01958 0.44745 + 10 2.45517 2.15232 2.42886 0.34277 18 t x - - + 1.39065 1.39065 1.39065 1.37335 + 0.03536 4.01212 4.09576 1.39554 0.28462 1.09775 0.40589 + 11 2.10260 2.95484 0.28160 2.64222 21 g x - - + 1.36740 1.40555 1.40555 1.36740 + 0.03843 3.92069 4.02468 1.44733 0.26814 1.09856 0.40549 + 12 2.54740 0.30185 2.61355 2.21647 26 c x - - + 1.38748 1.38276 1.38748 1.38748 + 0.03457 4.05446 4.09623 1.40847 0.28040 1.05496 0.42803 + 13 0.28443 2.72003 2.32214 2.48149 28 a x - - + 1.38740 1.38740 1.38298 1.38740 + 0.03441 4.05976 4.10001 1.41198 0.27926 1.09780 0.40587 + 14 0.29412 2.55413 2.49679 2.35701 30 a x - - + 1.38194 1.39067 1.38194 1.39067 + 0.03505 4.02482 4.10005 1.39522 0.28473 1.09929 0.40512 + 15 0.18837 2.99710 2.82270 2.77556 33 a x - - + 1.39015 1.39472 1.37503 1.38539 + 0.03725 3.97815 4.02618 1.37955 0.28994 1.10102 0.40426 + 16 0.50816 2.05151 2.22111 1.82407 37 a x - - + 1.36727 1.38730 1.39683 1.39405 + 0.04830 3.89881 3.61610 1.29026 0.32186 1.05306 0.42905 + 17 2.11260 2.73141 0.29747 2.64152 41 g x - - + 1.36913 1.40376 1.40376 1.36913 + 0.03705 3.93681 4.08299 1.44872 0.26771 1.07479 0.41759 + 18 2.24459 1.90539 2.34054 0.43234 46 t x - - + 1.33632 1.42493 1.39937 1.38665 + 0.04427 3.64574 4.06297 1.70501 0.20061 1.21309 0.35279 + 19 0.44322 2.17202 2.18055 2.03175 57 a x - - + 1.41047 1.41471 1.36338 1.35797 + 0.03970 3.81957 4.07540 1.65588 0.21186 1.22788 0.34660 + 20 0.33340 2.42691 2.40824 2.25160 66 a x - - + 1.29389 1.44615 1.37917 1.43324 + 0.04223 3.70146 4.09459 1.55158 0.23815 1.05880 0.42598 + 21 2.50563 1.98543 2.69601 0.33746 74 t x - - + 1.39462 1.39462 1.42862 1.32990 + 0.04184 3.80216 3.98177 1.80466 0.17976 1.00279 0.45705 + 22 2.54484 1.97505 2.66483 0.33806 84 t x - - + 1.39134 1.39489 1.38662 1.37246 + 0.03877 3.97504 3.95038 1.37620 0.29107 1.13932 0.38572 + 23 2.10159 2.83856 0.29282 2.61635 88 g x - - + 1.39682 1.39682 1.35536 1.39682 + 0.05046 3.75402 3.65808 1.08330 0.41321 1.13019 0.39004 + 24 2.25298 0.61854 2.50691 1.29221 90 c x - - + 1.35803 1.49605 1.46737 1.24379 + 0.06091 3.28322 3.83564 1.89752 0.16245 1.28788 0.32276 + 25 1.27819 2.23285 0.76242 1.91259 106 g x - - + 1.29024 1.67349 1.68279 1.04597 + 0.05752 3.44263 3.73311 2.58671 0.07825 1.26818 0.33037 + 26 1.86925 2.58352 0.39466 2.33986 131 g x - - + 1.31084 1.49412 1.46666 1.29002 + 0.04698 3.54257 4.07715 2.25245 0.11109 0.86163 0.54900 + 27 2.38297 1.93394 2.39162 0.39800 151 t x - - + 1.33582 1.47359 1.44163 1.30411 + 0.04951 3.48445 4.03783 2.15951 0.12260 1.21681 0.35122 + 28 2.41717 2.17810 2.62774 0.32113 170 t x - - + 1.36805 1.48060 1.37439 1.32840 + 0.04849 3.50958 4.05014 2.58370 0.07850 1.22399 0.34822 + 29 2.57764 2.35132 2.56552 0.28512 194 t x - - + 1.43829 1.43458 1.24787 1.43829 + 0.04667 3.56670 4.05428 2.49706 0.08591 1.23744 0.34267 + 30 2.47248 2.07688 2.62257 0.33172 215 t x - - + 1.25120 1.52623 1.70635 1.15531 + 0.08932 3.31524 3.01336 2.81842 0.06156 1.22909 0.34610 + 31 2.25937 2.13157 2.02027 0.43957 248 t x - - + 1.18172 1.43522 1.72841 1.28150 + 0.07936 2.93117 3.77395 2.46269 0.08906 0.60457 0.79034 + 32 2.04508 2.84981 0.30490 2.58263 280 g x - - + 1.17665 1.66785 1.66218 1.16056 + 0.05998 3.23615 3.96853 2.83684 0.06040 1.01952 0.44749 + 33 2.45103 0.38098 2.56776 1.87147 317 c x - - + 1.24153 1.52524 1.60663 1.22783 + 0.05538 3.39046 3.90294 2.73920 0.06680 1.18729 0.36391 + 34 2.22082 0.36258 2.75077 2.02704 347 c x - - + 1.15008 1.62014 1.86511 1.10673 + 0.06086 3.18178 4.04341 2.94504 0.05403 1.25991 0.33363 + 35 0.27033 2.66664 2.52541 2.43767 388 a x - - + 1.24951 1.47565 1.41392 1.42074 + 0.07123 3.00373 3.95552 3.13655 0.04440 1.28173 0.32512 + 36 2.83107 2.41670 2.97197 0.22235 439 t x - - + 1.37071 1.57683 1.38637 1.23972 + 0.05293 3.45216 3.91807 2.54402 0.08181 1.14651 0.38235 + 37 2.52322 2.25084 2.45909 0.31611 465 t x - - + 1.26335 1.55077 1.59008 1.19965 + 0.07504 3.13329 3.55006 3.08962 0.04659 1.13108 0.38962 + 38 0.45807 2.30687 1.98940 2.03143 512 a x - - + 1.15472 1.67511 1.53797 1.26320 + 0.09820 3.13076 2.99876 2.79197 0.06326 1.39915 0.28343 + 39 2.37471 0.42180 2.44763 1.80427 550 c x - - + 1.23785 1.49058 1.48364 1.35502 + 0.06081 3.19472 4.01643 2.41851 0.09327 0.94671 0.49105 + 40 2.32826 1.95481 2.36781 0.40458 578 t x - - + 1.36586 1.46001 1.43000 1.29720 + 0.05257 3.39673 4.03256 1.84862 0.17133 1.40979 0.27997 + 41 2.68669 2.13935 2.81520 0.28200 592 t x - - + 1.34965 1.42793 1.45781 1.31633 + 0.04735 3.57826 3.99988 2.09424 0.13144 1.22129 0.34934 + 42 2.55904 2.16444 2.70859 0.29952 609 t x - - + 1.12072 1.61936 1.63578 1.26895 + 0.07346 3.25910 3.42962 2.85641 0.05919 1.38363 0.28857 + 43 1.99923 1.61027 2.26343 0.57851 646 t x - - + 1.32290 1.58747 1.61095 1.11018 + 0.06656 3.08568 3.97944 2.44774 0.09046 0.75593 0.63407 + 44 0.23887 2.79899 2.55209 2.60783 675 a x - - + 1.18557 1.50323 1.59070 1.31590 + 0.05597 3.38637 3.88222 2.46900 0.08847 1.27945 0.32599 + 45 0.29593 2.53488 2.53903 2.32335 701 a x - - + 1.08710 1.54222 1.59276 1.40430 + 0.07539 2.94521 3.91062 1.91623 0.15918 1.22327 0.34852 + 46 2.58352 2.40524 2.76700 0.25955 725 t x - - + 1.19685 1.58503 1.74852 1.14293 + 0.06124 3.18279 4.02089 2.82961 0.06085 1.05474 0.42814 + 47 2.13251 2.88788 0.29508 2.50964 764 g x - - + 1.20891 1.55463 1.68206 1.19000 + 0.06526 3.12574 3.94910 2.41448 0.09367 1.10396 0.40280 + 48 2.23841 2.99164 0.25118 2.72900 792 g x - - + 1.26330 1.55339 1.52606 1.24355 + 0.05464 3.34968 4.01313 2.78872 0.06347 1.15133 0.38012 + 49 2.57533 0.32900 2.64632 2.01501 824 c x - - + 1.35118 1.39828 1.40141 1.39516 + 0.04340 3.79297 3.91506 1.59549 0.22666 1.20075 0.35806 + 50 0.46433 2.04127 2.23437 2.00605 833 a x - - + 1.23062 1.36903 1.62282 1.36182 + 0.05764 3.31530 3.92762 2.28791 0.10700 1.07910 0.41536 + 51 0.27513 2.77017 2.28518 2.57549 853 a x - - + 1.27958 1.58726 1.46109 1.25394 + 0.05750 3.30072 3.96214 2.60776 0.07656 1.25708 0.33475 + 52 0.20149 2.86434 2.84551 2.69770 883 a x - - + 1.23645 1.62259 1.71174 1.10368 + 0.05756 3.26729 4.02702 2.54508 0.08172 1.27391 0.32814 + 53 0.26982 2.65833 2.50477 2.46835 911 a x - - + 1.36005 1.50358 1.48100 1.22550 + 0.06921 3.37553 3.42118 2.36646 0.09851 1.27560 0.32748 + 54 0.40022 2.19284 2.22687 2.20396 934 a x - - + 1.12070 1.60472 1.53213 1.35895 + 0.05523 3.36752 3.94966 2.42917 0.09224 0.84774 0.55928 + 55 2.11356 0.46400 2.46442 1.79955 960 c x - - + 1.23932 1.35913 1.50478 1.46331 + 0.05187 3.47055 3.94022 2.35854 0.09933 1.12102 0.39445 + 56 1.85868 0.79440 2.22069 1.25971 983 c x - - + 1.21951 1.50212 1.51138 1.34185 + 0.06404 3.29054 3.69705 1.75742 0.18933 1.18410 0.36532 + 57 1.33272 2.32720 0.71452 1.90215 999 g x - - + 1.12229 1.49343 1.56653 1.42255 + 0.04920 3.46654 4.08749 2.17995 0.11996 1.31769 0.31164 + 58 2.48337 0.43652 2.46331 1.68683 1017 c x - - + 1.34704 1.55461 1.38112 1.28222 + 0.04823 3.61532 3.90311 2.20911 0.11631 1.00864 0.45368 + 59 0.41659 2.44509 1.93972 2.20507 1034 a x - - + 1.38198 1.38198 1.39194 1.38932 + 0.03641 3.98130 4.06929 1.35873 0.29704 1.31330 0.31325 + 60 0.41612 2.39160 1.97116 2.21075 1037 a x - - + 1.03649 1.46430 1.57421 1.57557 + 0.04769 3.52580 4.06641 2.32461 0.10294 0.84329 0.56263 + 61 2.66264 2.12302 2.82746 0.28581 1056 t x - - + 1.36925 1.39635 1.38930 1.39048 + 0.04097 3.97400 3.84718 1.39433 0.28502 1.12205 0.39395 + 62 2.26510 2.13196 2.42551 0.37231 1060 t x - - + 1.37965 1.39147 1.39147 1.38264 + 0.04082 3.91610 3.90805 1.24613 0.33914 0.95192 0.48776 + 63 0.41244 2.25761 2.16787 2.12907 1062 a x - - + 1.34515 1.41203 1.41203 1.37753 + 0.04054 3.77835 4.08203 1.30483 0.31638 1.11819 0.39582 + 64 2.51464 0.37905 2.62296 1.82008 1068 c x - - + 1.39543 1.38753 1.39233 1.37008 + 0.03854 3.90584 4.03535 1.36573 0.29463 1.13682 0.38689 + 65 2.16380 2.11332 2.18714 0.42765 1073 t x - - + 1.38764 1.38471 1.38519 1.38764 + 0.03575 4.05376 4.03073 1.40080 0.28289 1.03825 0.43707 + 66 2.79349 2.39141 2.87271 0.23478 1075 t x - - + 1.37227 1.39101 1.39101 1.39101 + 0.03597 4.01447 4.05827 1.39017 0.28639 1.06429 0.42308 + 67 2.82488 2.47749 2.93179 0.21887 1078 t x - - + 1.38141 1.39112 1.38915 1.38353 + 0.03661 3.99477 4.04370 1.35958 0.29675 1.13439 0.38804 + 68 2.77679 2.30433 2.90694 0.24425 1081 t x - - + 1.37593 1.38989 1.45520 1.32825 + 0.04447 3.68736 3.99242 1.76176 0.18843 0.98580 0.46703 + 69 2.47698 3.17398 0.19595 2.95437 1093 g x - - + 1.38264 1.38264 1.39734 1.38264 + 0.05358 3.96553 3.40487 1.40348 0.28202 1.03112 0.44100 + 70 2.84327 0.27906 2.97336 2.00890 1097 c x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03412 4.08811 4.08811 1.46634 0.26236 0.69006 0.69625 + 71 0.21870 2.83638 2.69251 2.65798 1098 a x - - + 1.37446 1.37942 1.39640 1.39509 + 0.03670 3.93983 4.09935 1.41905 0.27700 1.10002 0.40476 + 72 2.35233 0.46085 2.23804 1.78715 1103 c x - - + 1.38536 1.38781 1.38781 1.38421 + 0.03493 4.03822 4.09272 1.39310 0.28542 1.09638 0.40658 + 73 2.57111 0.32543 2.74124 1.98892 1105 c x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03381 4.09688 4.09688 1.46634 0.26236 1.09626 0.40664 + 74 0.27014 2.61416 2.53262 2.47636 1106 a x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03461 4.09267 4.05587 1.46634 0.26236 1.09748 0.40603 + 75 0.52873 2.16549 1.91736 1.90409 1107 a x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03426 4.08396 4.08396 1.46634 0.26236 1.07423 0.41788 + 76 2.33134 0.38082 2.65861 1.90055 1108 c x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03466 4.07266 4.07266 1.46634 0.26236 1.09861 0.40547 + 77 2.20588 0.45134 2.35553 1.84373 1109 c x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03550 4.04912 4.04912 1.46634 0.26236 1.09861 0.40547 + 78 2.69018 2.22054 2.82311 0.26898 1110 t x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03711 4.00561 4.00561 1.46634 0.26236 1.09861 0.40547 + 79 0.16248 3.15867 2.86159 2.98963 1111 a x - - + 1.38629 1.38629 1.38629 1.38629 + 0.04048 3.92018 3.92018 1.46634 0.26236 1.09861 0.40547 + 80 0.17484 3.04770 2.86638 2.88183 1112 a x - - + 1.38629 1.38629 1.38629 1.38629 + 0.02045 3.90014 * 1.46634 0.26236 0.00000 * +// \ No newline at end of file diff --git a/q2_types/hmmer/tests/data/hmms/2_rna.hmm b/q2_types/hmmer/tests/data/hmms/2_rna.hmm new file mode 100644 index 00000000..eb33ee5a --- /dev/null +++ b/q2_types/hmmer/tests/data/hmms/2_rna.hmm @@ -0,0 +1,764 @@ +HMMER3/f [3.1b2 | February 2015] +NAME 5S_rRNA +ACC RF00001 +DESC 5S ribosomal RNA +LENG 119 +MAXL 305 +ALPH RNA +RF yes +MM no +CONS yes +CS yes +MAP yes +DATE Sun Feb 19 15:28:58 2017 +NSEQ 712 +EFFN 9.307953 +CKSUM 242171328 +STATS LOCAL MSV -9.2495 0.71806 +STATS LOCAL VITERBI -10.6521 0.71806 +STATS LOCAL FORWARD -3.9747 0.71806 +HMM A C G U + m->m m->i m->d i->m i->i d->m d->d + COMPO 1.44549 1.37725 1.25977 1.47676 + 1.43153 1.34302 1.34302 1.43153 + 0.07274 4.09044 2.92951 1.07081 0.41966 0.00000 * + 1 1.54335 1.70220 1.04746 1.37348 3 g g - ( + 1.38629 1.38629 1.38629 1.38629 + 0.02796 4.69986 3.99134 1.46634 0.26236 0.98356 0.46837 + 2 1.69437 1.14964 1.46606 1.31416 4 c c - ( + 1.38629 1.38629 1.38629 1.38629 + 0.01877 4.70906 4.64822 1.46634 0.26236 1.12704 0.39155 + 3 1.25252 1.38627 1.88263 1.16469 5 u c - ( + 1.38629 1.38629 1.38629 1.38629 + 0.01965 4.72024 4.55191 1.46634 0.26236 0.80895 0.58937 + 4 1.72700 2.07424 1.11856 0.99484 6 u u - ( + 1.38629 1.38629 1.38629 1.38629 + 0.01870 4.73297 4.63281 1.46634 0.26236 0.66621 0.72083 + 5 1.54450 2.37555 0.52987 2.25440 7 g G - ( + 1.38629 1.38629 1.38629 1.38629 + 0.02087 4.74329 4.42737 1.46634 0.26236 1.07337 0.41833 + 6 2.71222 0.55655 2.16572 1.40340 8 c c - ( + 1.46123 1.34104 1.46123 1.29271 + 0.04754 3.32700 4.55416 0.43093 1.04954 1.06781 0.42123 + 7 1.88183 3.18268 0.27858 3.00864 10 g g - ( + 1.38629 1.38629 1.38629 1.38629 + 0.02914 4.74112 3.91233 1.46634 0.26236 1.10199 0.40378 + 8 1.21294 1.87108 0.91371 1.91267 11 g g - ( + 1.42544 1.35183 1.34543 1.42544 + 0.03222 3.77698 4.73128 0.64731 0.74118 1.45006 0.26730 + 9 2.92783 0.67317 2.82760 0.97486 13 c c - ( + 1.39851 1.35052 1.39851 1.39851 + 0.02477 4.33224 4.48084 1.04107 0.43553 1.51536 0.24811 + 10 2.18357 0.54013 1.84964 1.91465 15 c C - , + 1.37342 1.39919 1.39919 1.37371 + 0.02250 4.31140 4.72954 1.02501 0.44440 1.36766 0.29397 + 11 0.38365 2.42629 2.19201 2.13222 17 a A - , + 1.38629 1.38629 1.38629 1.38629 + 0.04294 4.73158 3.40456 1.46634 0.26236 0.62352 0.76799 + 12 2.20731 2.05846 2.30257 0.41196 18 u U - , + 1.39935 1.39935 1.34812 1.39935 + 0.02367 4.29767 4.62666 1.02132 0.44647 1.64817 0.21369 + 13 0.46807 2.44693 1.91775 1.96401 20 a A - , + 1.38629 1.38629 1.38629 1.38629 + 0.01893 4.72039 4.62082 1.46634 0.26236 0.41681 1.07631 + 14 2.55473 0.83543 1.03807 2.00646 21 c c - < + 1.38629 1.38629 1.38629 1.38629 + 0.01755 4.74471 4.74471 1.46634 0.26236 1.13291 0.38875 + 15 2.39320 0.46353 2.32167 1.70653 22 c c - < + 1.45283 1.09532 1.57794 1.49050 + 0.05776 3.04853 4.74471 0.87955 0.53609 1.03335 0.43977 + 16 0.99836 2.22160 1.07887 1.69771 27 a a - - + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 17 1.65807 1.64143 0.81511 1.75330 28 g g - < + 1.32750 1.40669 1.40669 1.40669 + 0.02485 4.14453 4.74562 0.87729 0.53770 1.09861 0.40547 + 18 1.74878 1.07763 1.33154 1.50715 30 c c - < + 1.36832 1.40719 1.40719 1.36333 + 0.02503 4.13325 4.74562 0.86889 0.54372 1.09861 0.40547 + 19 1.11114 1.72950 1.24664 1.58000 32 a g - < + 1.36453 1.37249 1.40441 1.40441 + 0.02402 4.19685 4.74562 0.91728 0.51017 1.09861 0.40547 + 20 1.75962 1.21469 1.33724 1.31484 34 c c - < + 1.34930 1.39894 1.39894 1.39894 + 0.02204 4.33423 4.74562 1.03089 0.44113 1.09861 0.40547 + 21 2.10367 2.26221 0.69525 1.29127 36 g g - < + 1.23194 1.49181 1.35346 1.49181 + 0.09167 3.83584 2.71797 1.21903 0.35029 1.09861 0.40547 + 22 1.08122 2.07379 0.96621 1.86701 40 g a - - + 1.38629 1.38629 1.38629 1.38629 + 0.01861 4.68660 4.68660 1.46634 0.26236 0.30116 1.34692 + 23 0.65069 1.92638 2.07928 1.57205 41 a A - - + 1.25691 1.56831 1.56831 1.20840 + 0.05734 3.06802 4.68691 2.01181 0.14358 0.97567 0.47311 + 24 0.77013 1.57188 2.38564 1.43817 52 a a - - + 1.40004 1.40004 1.34615 1.40004 + 0.02963 4.30295 4.15605 1.00560 0.45543 0.96359 0.48049 + 25 1.08270 1.81905 0.98134 2.08480 54 g g - < + 1.38629 1.38629 1.38629 1.38629 + 0.01935 4.73865 4.56477 1.46634 0.26236 0.78701 0.60734 + 26 2.77791 0.32597 3.23107 1.73450 55 c c - < + 1.37317 1.36923 1.40162 1.40162 + 0.02306 4.26226 4.74336 0.97163 0.47557 1.03813 0.43713 + 27 0.59716 2.50150 1.63951 1.75105 57 a A - - + 1.68144 1.28909 1.43347 1.20433 + 0.06658 3.11247 3.91592 2.07020 0.13486 1.13318 0.38862 + 28 3.45772 0.15214 3.28331 2.62934 68 c C - - + 1.24691 1.52452 1.28843 1.51786 + 0.06417 3.05117 4.20964 0.68732 0.69901 0.60688 0.78756 + 29 2.54112 0.43659 2.14754 1.84379 71 c c - < + 1.38629 1.38629 1.38629 1.38629 + 0.02064 4.73962 4.44910 1.46634 0.26236 1.17883 0.36765 + 30 1.75760 1.28076 1.06455 1.58555 72 g g - < + 1.38629 1.38629 1.38629 1.38629 + 0.01767 4.73785 4.73785 1.46634 0.26236 0.83716 0.56729 + 31 2.45757 2.10516 0.32641 2.64477 73 g G - < + 1.38629 1.61608 1.38629 1.19957 + 0.04126 3.46755 4.68520 1.40188 0.28254 1.16513 0.37379 + 32 0.68847 2.43846 1.70166 1.47850 78 a a - < + 1.39953 1.34760 1.39953 1.39953 + 0.04835 4.31604 3.38601 1.01709 0.44886 1.18379 0.36545 + 33 2.02827 2.44383 2.92174 0.31777 80 u u - < + 1.38629 1.38629 1.38629 1.38629 + 0.01803 4.71785 4.71785 1.46634 0.26236 1.72111 0.19707 + 34 3.56015 0.19950 3.88725 2.02556 81 c C - < + 1.35718 1.41983 1.23310 1.56316 + 0.05788 3.17887 4.22665 1.23421 0.34399 1.80037 0.18061 + 35 2.77743 0.33393 2.97305 1.76872 86 c C - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01812 4.71305 4.71305 1.46634 0.26236 1.40817 0.28050 + 36 3.98420 0.10391 3.40107 3.06287 87 C C - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01801 4.71875 4.71875 1.46634 0.26236 1.80037 0.18061 + 37 0.60764 2.71262 1.22750 2.34366 88 a A - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01801 4.71875 4.71875 1.46634 0.26236 1.80037 0.18061 + 38 3.23088 3.18734 3.87694 0.10705 89 u U - _ + 1.38629 1.38629 1.38629 1.38629 + 0.03159 4.71875 3.80896 1.46634 0.26236 1.74175 0.19263 + 39 2.16110 0.61914 2.26925 1.41468 90 c C - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01825 4.70596 4.70596 1.46634 0.26236 1.93795 0.15548 + 40 1.23660 0.86177 2.56859 1.55791 91 c c - _ + 1.77144 0.52247 2.28261 2.00365 + 0.48567 0.98086 4.63299 0.38856 1.13330 1.09154 0.40902 + 41 2.95007 4.22451 0.08802 4.05756 99 G G - > + 1.38629 1.38629 1.38629 1.38629 + 0.01855 4.71928 4.66066 1.46634 0.26236 1.79035 0.18260 + 42 0.19851 2.48110 3.36603 2.78270 100 a a - > + 1.40690 1.32690 1.40690 1.40690 + 0.02561 4.11282 4.71875 0.87369 0.54027 0.56674 0.83787 + 43 0.56275 2.33321 2.68997 1.32616 102 a A - - + 1.38629 1.38629 1.38629 1.38629 + 0.01763 4.74024 4.74024 1.46634 0.26236 1.28599 0.32349 + 44 3.39375 0.12438 4.17735 2.68773 103 c C - - + 1.38629 1.38629 1.38629 1.38629 + 0.01763 4.74024 4.74024 1.46634 0.26236 0.98476 0.46765 + 45 1.56019 1.81908 2.57774 0.59460 104 u u - > + 1.38629 1.38629 1.38629 1.38629 + 0.01757 4.74330 4.74330 1.46634 0.26236 1.12516 0.39245 + 46 2.56361 0.38721 1.86761 2.41311 105 c C - > + 1.38629 1.38629 1.38629 1.38629 + 0.01756 4.74383 4.74383 1.46634 0.26236 1.16513 0.37379 + 47 1.65610 0.99848 1.23540 1.89738 106 c c - > + 1.39034 1.39806 1.42179 1.33693 + 0.03045 3.84973 4.74383 0.68205 0.70437 0.97754 0.47198 + 48 2.00597 3.06134 0.26182 3.01610 108 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.02119 4.74562 4.39977 1.46634 0.26236 1.09861 0.40547 + 49 0.58998 1.58170 2.46808 1.86247 109 a A - - + 1.46483 1.46483 1.42461 1.21347 + 0.04703 3.29086 4.74203 0.41677 1.07638 0.88179 0.53450 + 50 0.30089 2.06397 2.82571 2.60877 111 a A - - + 1.38629 1.38629 1.38629 1.38629 + 0.01853 4.74562 4.63915 1.46634 0.26236 1.09861 0.40547 + 51 2.65105 3.53770 0.14206 3.41803 112 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01755 4.74465 4.74465 1.46634 0.26236 1.13538 0.38758 + 52 2.25374 1.86995 2.96108 0.37237 113 u u - > + 1.38629 1.38629 1.38629 1.38629 + 0.02119 4.74465 4.40076 1.46634 0.26236 1.13538 0.38758 + 53 2.30451 2.06868 1.68368 0.53077 114 u U - - + 1.38629 1.38629 1.38629 1.38629 + 0.01761 4.74107 4.74107 1.46634 0.26236 1.15277 0.37945 + 54 0.07177 3.47955 4.08658 3.83347 115 A A - - + 1.38629 1.38629 1.38629 1.38629 + 0.01760 4.74205 4.74205 1.46634 0.26236 1.22683 0.34703 + 55 0.07030 3.55771 4.11216 3.77171 116 A A - - + 1.35190 1.39803 1.39803 1.39803 + 0.02925 4.35534 4.13613 1.05273 0.42922 1.22683 0.34703 + 56 1.71014 3.01237 0.32835 2.99827 118 g G - - + 1.85597 1.60694 0.90297 1.43618 + 0.08251 2.66512 4.64522 2.63460 0.07445 0.63738 0.75221 + 57 2.17286 0.32500 2.86949 2.23591 136 c c - > + 1.38629 1.38629 1.38629 1.38629 + 0.01855 4.74481 4.63738 1.46634 0.26236 1.03982 0.43621 + 58 1.54608 1.38572 1.03143 1.71325 137 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01755 4.74464 4.74464 1.46634 0.26236 1.02829 0.44257 + 59 1.73179 1.41448 1.67883 0.93296 138 u c - > + 1.38629 1.38629 1.38629 1.38629 + 0.01820 4.74562 4.67277 1.46634 0.26236 1.09861 0.40547 + 60 1.88051 1.48253 0.89529 1.55148 139 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01802 4.74497 4.69262 1.46634 0.26236 1.05064 0.43034 + 61 1.83989 0.89343 1.57228 1.49458 140 c c - > + 1.38629 1.38629 1.38629 1.38629 + 0.02062 4.74516 4.44689 1.46634 0.26236 1.06402 0.42322 + 62 1.95525 1.32639 2.27770 0.71229 141 u U - - + 1.38629 1.38629 1.38629 1.38629 + 0.01759 4.74260 4.74260 1.46634 0.26236 0.90996 0.51507 + 63 1.17195 1.68298 2.18999 0.93522 142 u u - - + 1.42850 1.37453 1.30763 1.44006 + 0.04330 3.39069 4.74562 0.45444 1.00732 1.09861 0.40547 + 64 2.06137 2.80967 0.29508 2.68794 144 g g - > + 1.39830 1.35112 1.39830 1.39830 + 0.02181 4.35145 4.74562 1.04608 0.43281 1.09861 0.40547 + 65 2.09444 1.16363 0.78407 2.22593 146 g g - > + 1.36526 1.42987 1.32423 1.42987 + 0.02827 3.99693 4.65608 0.93847 0.49631 1.09861 0.40547 + 66 2.69864 3.10988 0.17720 2.98611 149 g g - < + 1.44804 1.49525 1.55952 1.10703 + 0.04988 3.21982 4.74481 0.88506 0.53220 1.03982 0.43621 + 67 2.75316 0.33473 2.45264 2.00485 153 c C - < + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 68 2.10133 1.01237 1.16670 1.59476 154 c c - < + 1.35433 1.34384 1.40173 1.44874 + 0.03127 3.81223 4.74562 0.88206 0.53432 1.09861 0.40547 + 69 1.06718 1.90576 1.25097 1.50920 157 a a - - + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 70 1.17832 2.48536 0.67247 2.31807 158 g g - < + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 71 1.12397 2.13330 1.34852 1.21421 159 a g - < + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 72 1.92362 1.97652 0.76090 1.39381 160 g g - - + 1.38629 1.38629 1.38629 1.38629 + 0.19341 4.74562 1.78878 1.46634 0.26236 1.09861 0.40547 + 73 1.49538 1.94718 2.24044 0.64103 161 u U - - + 1.38629 1.38629 1.38629 1.38629 + 0.19486 4.57308 1.79140 1.46634 0.26236 3.05575 0.04823 + 74 0.45457 2.40448 2.17182 1.82635 162 a A - - + 1.25913 1.51334 1.51334 1.28825 + 0.08684 2.64599 4.40301 0.39629 1.11722 0.32782 1.27474 + 75 2.93299 3.01855 0.13952 3.57151 165 g G - - + 1.38629 1.38629 1.38629 1.38629 + 0.01888 4.67199 4.67199 1.46634 0.26236 0.20256 1.69628 + 76 3.99358 2.92642 4.19781 0.09107 166 U U - < + 1.38629 1.38629 1.38629 1.38629 + 0.01754 4.74521 4.74521 1.46634 0.26236 1.11450 0.39761 + 77 0.11617 3.11470 3.66144 3.22913 167 a A - - + 1.38629 1.38629 1.38629 1.38629 + 0.03608 4.74521 3.62152 1.46634 0.26236 1.11450 0.39761 + 78 2.84125 0.35510 1.85871 2.46899 168 c c - < + 1.38629 1.38629 1.38629 1.38629 + 0.01843 4.72699 4.66629 1.46634 0.26236 1.63398 0.21710 + 79 3.31819 2.37733 3.05633 0.19368 169 u u - < + 1.38629 1.38629 1.38629 1.38629 + 0.02382 4.72644 4.22112 1.46634 0.26236 1.34190 0.30293 + 80 1.03409 2.18770 1.15649 1.52470 170 a a - - + 1.38629 1.38629 1.38629 1.38629 + 0.01792 4.72376 4.72376 1.46634 0.26236 1.06120 0.42471 + 81 1.86872 1.58634 0.86303 1.51811 171 g g - - + 1.38629 1.38629 1.38629 1.38629 + 0.02599 4.73172 4.08388 1.46634 0.26236 1.33266 0.30622 + 82 1.78916 2.52942 0.37772 2.69164 172 g G - - + 1.38629 1.38629 1.38629 1.38629 + 0.14347 4.72552 2.08118 1.46634 0.26236 1.35133 0.29961 + 83 0.89061 2.00611 1.13404 2.01474 173 a a - - + 1.34036 1.40209 1.40209 1.40209 + 0.16534 4.10702 1.99558 1.13868 0.38602 1.47891 0.25862 + 84 2.50495 2.45089 2.96402 0.24783 176 u U - - + 1.38629 1.38629 1.38629 1.38629 + 0.04231 4.50624 3.49387 1.46634 0.26236 0.12430 2.14660 + 85 1.96114 2.03452 0.46600 2.29211 177 g G - < + 1.38629 1.38629 1.38629 1.38629 + 0.01800 4.71951 4.71951 1.46634 0.26236 1.78612 0.18345 + 86 1.96207 1.76997 0.59462 1.98542 178 g g - < + 1.38629 1.38629 1.38629 1.38629 + 0.06564 4.71951 2.90743 1.46634 0.26236 1.68897 0.20421 + 87 2.48356 3.14584 0.23004 2.53801 179 g G - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01885 4.67369 4.67369 1.46634 0.26236 1.45718 0.26513 + 88 1.55291 1.80348 1.69102 0.82254 180 u u - _ + 1.38629 1.38629 1.38629 1.38629 + 0.02185 4.68814 4.38920 1.46634 0.26236 0.91915 0.50892 + 89 1.98709 2.38814 0.43269 2.10089 181 g G - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01940 4.71010 4.58486 1.46634 0.26236 1.66421 0.20991 + 90 0.48856 2.23554 2.05100 1.89081 182 a A - _ + 1.38629 1.38629 1.38629 1.38629 + 0.04725 4.71185 3.29255 1.46634 0.26236 0.71674 0.67010 + 91 2.08360 0.69276 1.61378 1.73617 183 c c - > + 1.38629 1.38629 1.38629 1.38629 + 0.02481 4.20562 4.64669 0.95696 0.48460 1.07845 0.41570 + 92 2.58338 0.49017 1.71456 2.02558 185 c C - > + 1.41519 1.37531 1.38981 1.36557 + 0.05835 3.94245 3.28929 0.75530 0.63463 1.29897 0.31857 + 93 1.44648 1.55255 1.47019 1.13006 187 u u - - + 1.38629 1.38629 1.38629 1.38629 + 0.02238 4.69570 4.34314 1.46634 0.26236 0.91028 0.51486 + 94 2.44101 0.55901 2.21621 1.46046 188 c C - - + 1.38629 1.38629 1.38629 1.38629 + 0.01922 4.71416 4.59873 1.46634 0.26236 1.56388 0.23486 + 95 1.61020 0.94090 1.43457 1.76225 189 c c - - + 1.38629 1.38629 1.38629 1.38629 + 0.03993 4.71641 3.50002 1.46634 0.26236 1.58461 0.22945 + 96 2.18875 1.48910 2.33334 0.57023 190 u U - - + 1.38629 1.38629 1.38629 1.38629 + 0.03318 4.69762 3.74983 1.46634 0.26236 1.94079 0.15501 + 97 2.22832 2.76588 0.27337 2.68011 191 g G - - + 1.39934 1.39934 1.39934 1.34815 + 0.03844 4.26291 3.74534 1.02156 0.44634 0.26192 1.46781 + 98 2.23154 2.13562 0.42666 2.10555 193 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01781 4.72977 4.72977 1.46634 0.26236 1.57070 0.23307 + 99 2.25826 3.31778 0.19553 3.30137 194 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01781 4.72977 4.72977 1.46634 0.26236 0.55476 0.85381 + 100 0.12870 3.11387 3.74780 2.94195 195 a A - - + 1.49330 1.35303 1.35303 1.35303 + 0.03095 3.82658 4.74521 1.21784 0.35079 1.11450 0.39761 + 101 0.52168 2.77619 1.31025 2.59762 199 a A - > + 1.42051 1.37646 1.33049 1.42051 + 0.50426 3.27205 1.02688 0.50867 0.91954 1.11450 0.39761 + 102 1.75707 1.65310 1.00247 1.31295 202 g g - - + 1.38629 1.38629 1.38629 1.38629 + 0.02709 4.31530 4.31530 1.46634 0.26236 0.06230 2.80676 + 103 0.98530 1.54075 2.13641 1.22290 203 a a - - + 1.38629 1.38629 1.38629 1.38629 + 0.01763 4.74029 4.74029 1.46634 0.26236 0.80681 0.59109 + 104 1.91716 0.92422 1.32032 1.66554 204 c c - > + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 105 2.25576 0.81533 2.15099 1.08962 205 c c - > + 1.38629 1.38629 1.38629 1.38629 + 0.03272 4.74562 3.75059 1.46634 0.26236 1.09861 0.40547 + 106 1.07692 1.79642 1.75375 1.13836 206 a a - - + 1.47073 1.41139 1.36911 1.30156 + 0.05759 3.16375 4.29051 0.98783 0.46583 0.55355 0.85544 + 107 1.77856 1.36913 0.90724 1.75358 211 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01762 4.74081 4.74081 1.46634 0.26236 1.26779 0.33052 + 108 2.13110 2.71915 0.28126 2.80460 212 g G - > + 1.37506 1.46572 1.39000 1.31980 + 0.04786 3.28070 4.69626 0.41342 1.08289 1.26779 0.33052 + 109 1.62498 2.09339 1.94510 0.62205 214 u u - > + 1.67585 0.77934 1.88169 1.60043 + 0.48638 0.97901 4.65939 0.06094 2.82815 0.84034 0.56486 + 110 1.68495 3.43968 0.29793 3.21597 217 g g - ) + 1.40521 1.33160 1.40521 1.40521 + 0.03979 4.17697 3.74368 0.90276 0.51995 1.14102 0.38492 + 111 2.20670 1.02757 2.06911 0.90199 219 u c - ) + 1.38629 1.38629 1.38629 1.38629 + 0.03044 4.72941 3.85623 1.46634 0.26236 0.89025 0.52857 + 112 2.56171 1.14811 2.54620 0.64015 220 u c - ) + 1.22072 1.44819 1.44819 1.44819 + 0.03025 3.90293 4.64488 1.01008 0.45286 1.57949 0.23077 + 113 2.42825 3.31342 0.17401 3.34863 223 g g - ) + 1.37346 1.36856 1.40182 1.40182 + 0.02545 4.23975 4.53557 0.96752 0.47807 0.78017 0.61309 + 114 2.28462 0.62468 2.31949 1.33017 225 c C - ) + 1.38629 1.38629 1.38629 1.38629 + 0.01814 4.73640 4.68744 1.46634 0.26236 1.26908 0.33002 + 115 0.95503 1.17607 1.96286 1.79422 226 a a - ) + 1.38629 1.38629 1.38629 1.38629 + 0.02255 4.73719 4.30270 1.46634 0.26236 1.31072 0.31420 + 116 1.24426 1.94348 1.35080 1.17245 227 u g - ) + 1.38629 1.38629 1.38629 1.38629 + 0.03289 4.73302 3.74836 1.46634 0.26236 1.49088 0.25512 + 117 1.61067 1.83596 1.13930 1.13712 228 u g - ) + 1.38629 1.38629 1.38629 1.38629 + 0.03308 4.71816 3.74632 1.46634 0.26236 1.68263 0.20565 + 118 1.74114 0.93694 1.54031 1.52079 229 c c - ) + 1.38629 1.38629 1.38629 1.38629 + 0.10437 4.70462 2.40738 1.46634 0.26236 2.03123 0.14061 + 119 1.80143 0.85215 2.05176 1.27320 230 c c - : + 1.38629 1.38629 1.38629 1.38629 + 0.01000 4.61025 * 1.46634 0.26236 0.00000 * +// +HMMER3/f [3.1b2 | February 2015] +NAME 5S_rRNA +ACC RF00001 +DESC 5S ribosomal RNA +LENG 119 +MAXL 305 +ALPH RNA +RF yes +MM no +CONS yes +CS yes +MAP yes +DATE Sun Feb 19 15:28:58 2017 +NSEQ 712 +EFFN 9.307953 +CKSUM 242171328 +STATS LOCAL MSV -9.2495 0.71806 +STATS LOCAL VITERBI -10.6521 0.71806 +STATS LOCAL FORWARD -3.9747 0.71806 +HMM A C G U + m->m m->i m->d i->m i->i d->m d->d + COMPO 1.44549 1.37725 1.25977 1.47676 + 1.43153 1.34302 1.34302 1.43153 + 0.07274 4.09044 2.92951 1.07081 0.41966 0.00000 * + 1 1.54335 1.70220 1.04746 1.37348 3 g g - ( + 1.38629 1.38629 1.38629 1.38629 + 0.02796 4.69986 3.99134 1.46634 0.26236 0.98356 0.46837 + 2 1.69437 1.14964 1.46606 1.31416 4 c c - ( + 1.38629 1.38629 1.38629 1.38629 + 0.01877 4.70906 4.64822 1.46634 0.26236 1.12704 0.39155 + 3 1.25252 1.38627 1.88263 1.16469 5 u c - ( + 1.38629 1.38629 1.38629 1.38629 + 0.01965 4.72024 4.55191 1.46634 0.26236 0.80895 0.58937 + 4 1.72700 2.07424 1.11856 0.99484 6 u u - ( + 1.38629 1.38629 1.38629 1.38629 + 0.01870 4.73297 4.63281 1.46634 0.26236 0.66621 0.72083 + 5 1.54450 2.37555 0.52987 2.25440 7 g G - ( + 1.38629 1.38629 1.38629 1.38629 + 0.02087 4.74329 4.42737 1.46634 0.26236 1.07337 0.41833 + 6 2.71222 0.55655 2.16572 1.40340 8 c c - ( + 1.46123 1.34104 1.46123 1.29271 + 0.04754 3.32700 4.55416 0.43093 1.04954 1.06781 0.42123 + 7 1.88183 3.18268 0.27858 3.00864 10 g g - ( + 1.38629 1.38629 1.38629 1.38629 + 0.02914 4.74112 3.91233 1.46634 0.26236 1.10199 0.40378 + 8 1.21294 1.87108 0.91371 1.91267 11 g g - ( + 1.42544 1.35183 1.34543 1.42544 + 0.03222 3.77698 4.73128 0.64731 0.74118 1.45006 0.26730 + 9 2.92783 0.67317 2.82760 0.97486 13 c c - ( + 1.39851 1.35052 1.39851 1.39851 + 0.02477 4.33224 4.48084 1.04107 0.43553 1.51536 0.24811 + 10 2.18357 0.54013 1.84964 1.91465 15 c C - , + 1.37342 1.39919 1.39919 1.37371 + 0.02250 4.31140 4.72954 1.02501 0.44440 1.36766 0.29397 + 11 0.38365 2.42629 2.19201 2.13222 17 a A - , + 1.38629 1.38629 1.38629 1.38629 + 0.04294 4.73158 3.40456 1.46634 0.26236 0.62352 0.76799 + 12 2.20731 2.05846 2.30257 0.41196 18 u U - , + 1.39935 1.39935 1.34812 1.39935 + 0.02367 4.29767 4.62666 1.02132 0.44647 1.64817 0.21369 + 13 0.46807 2.44693 1.91775 1.96401 20 a A - , + 1.38629 1.38629 1.38629 1.38629 + 0.01893 4.72039 4.62082 1.46634 0.26236 0.41681 1.07631 + 14 2.55473 0.83543 1.03807 2.00646 21 c c - < + 1.38629 1.38629 1.38629 1.38629 + 0.01755 4.74471 4.74471 1.46634 0.26236 1.13291 0.38875 + 15 2.39320 0.46353 2.32167 1.70653 22 c c - < + 1.45283 1.09532 1.57794 1.49050 + 0.05776 3.04853 4.74471 0.87955 0.53609 1.03335 0.43977 + 16 0.99836 2.22160 1.07887 1.69771 27 a a - - + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 17 1.65807 1.64143 0.81511 1.75330 28 g g - < + 1.32750 1.40669 1.40669 1.40669 + 0.02485 4.14453 4.74562 0.87729 0.53770 1.09861 0.40547 + 18 1.74878 1.07763 1.33154 1.50715 30 c c - < + 1.36832 1.40719 1.40719 1.36333 + 0.02503 4.13325 4.74562 0.86889 0.54372 1.09861 0.40547 + 19 1.11114 1.72950 1.24664 1.58000 32 a g - < + 1.36453 1.37249 1.40441 1.40441 + 0.02402 4.19685 4.74562 0.91728 0.51017 1.09861 0.40547 + 20 1.75962 1.21469 1.33724 1.31484 34 c c - < + 1.34930 1.39894 1.39894 1.39894 + 0.02204 4.33423 4.74562 1.03089 0.44113 1.09861 0.40547 + 21 2.10367 2.26221 0.69525 1.29127 36 g g - < + 1.23194 1.49181 1.35346 1.49181 + 0.09167 3.83584 2.71797 1.21903 0.35029 1.09861 0.40547 + 22 1.08122 2.07379 0.96621 1.86701 40 g a - - + 1.38629 1.38629 1.38629 1.38629 + 0.01861 4.68660 4.68660 1.46634 0.26236 0.30116 1.34692 + 23 0.65069 1.92638 2.07928 1.57205 41 a A - - + 1.25691 1.56831 1.56831 1.20840 + 0.05734 3.06802 4.68691 2.01181 0.14358 0.97567 0.47311 + 24 0.77013 1.57188 2.38564 1.43817 52 a a - - + 1.40004 1.40004 1.34615 1.40004 + 0.02963 4.30295 4.15605 1.00560 0.45543 0.96359 0.48049 + 25 1.08270 1.81905 0.98134 2.08480 54 g g - < + 1.38629 1.38629 1.38629 1.38629 + 0.01935 4.73865 4.56477 1.46634 0.26236 0.78701 0.60734 + 26 2.77791 0.32597 3.23107 1.73450 55 c c - < + 1.37317 1.36923 1.40162 1.40162 + 0.02306 4.26226 4.74336 0.97163 0.47557 1.03813 0.43713 + 27 0.59716 2.50150 1.63951 1.75105 57 a A - - + 1.68144 1.28909 1.43347 1.20433 + 0.06658 3.11247 3.91592 2.07020 0.13486 1.13318 0.38862 + 28 3.45772 0.15214 3.28331 2.62934 68 c C - - + 1.24691 1.52452 1.28843 1.51786 + 0.06417 3.05117 4.20964 0.68732 0.69901 0.60688 0.78756 + 29 2.54112 0.43659 2.14754 1.84379 71 c c - < + 1.38629 1.38629 1.38629 1.38629 + 0.02064 4.73962 4.44910 1.46634 0.26236 1.17883 0.36765 + 30 1.75760 1.28076 1.06455 1.58555 72 g g - < + 1.38629 1.38629 1.38629 1.38629 + 0.01767 4.73785 4.73785 1.46634 0.26236 0.83716 0.56729 + 31 2.45757 2.10516 0.32641 2.64477 73 g G - < + 1.38629 1.61608 1.38629 1.19957 + 0.04126 3.46755 4.68520 1.40188 0.28254 1.16513 0.37379 + 32 0.68847 2.43846 1.70166 1.47850 78 a a - < + 1.39953 1.34760 1.39953 1.39953 + 0.04835 4.31604 3.38601 1.01709 0.44886 1.18379 0.36545 + 33 2.02827 2.44383 2.92174 0.31777 80 u u - < + 1.38629 1.38629 1.38629 1.38629 + 0.01803 4.71785 4.71785 1.46634 0.26236 1.72111 0.19707 + 34 3.56015 0.19950 3.88725 2.02556 81 c C - < + 1.35718 1.41983 1.23310 1.56316 + 0.05788 3.17887 4.22665 1.23421 0.34399 1.80037 0.18061 + 35 2.77743 0.33393 2.97305 1.76872 86 c C - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01812 4.71305 4.71305 1.46634 0.26236 1.40817 0.28050 + 36 3.98420 0.10391 3.40107 3.06287 87 C C - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01801 4.71875 4.71875 1.46634 0.26236 1.80037 0.18061 + 37 0.60764 2.71262 1.22750 2.34366 88 a A - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01801 4.71875 4.71875 1.46634 0.26236 1.80037 0.18061 + 38 3.23088 3.18734 3.87694 0.10705 89 u U - _ + 1.38629 1.38629 1.38629 1.38629 + 0.03159 4.71875 3.80896 1.46634 0.26236 1.74175 0.19263 + 39 2.16110 0.61914 2.26925 1.41468 90 c C - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01825 4.70596 4.70596 1.46634 0.26236 1.93795 0.15548 + 40 1.23660 0.86177 2.56859 1.55791 91 c c - _ + 1.77144 0.52247 2.28261 2.00365 + 0.48567 0.98086 4.63299 0.38856 1.13330 1.09154 0.40902 + 41 2.95007 4.22451 0.08802 4.05756 99 G G - > + 1.38629 1.38629 1.38629 1.38629 + 0.01855 4.71928 4.66066 1.46634 0.26236 1.79035 0.18260 + 42 0.19851 2.48110 3.36603 2.78270 100 a a - > + 1.40690 1.32690 1.40690 1.40690 + 0.02561 4.11282 4.71875 0.87369 0.54027 0.56674 0.83787 + 43 0.56275 2.33321 2.68997 1.32616 102 a A - - + 1.38629 1.38629 1.38629 1.38629 + 0.01763 4.74024 4.74024 1.46634 0.26236 1.28599 0.32349 + 44 3.39375 0.12438 4.17735 2.68773 103 c C - - + 1.38629 1.38629 1.38629 1.38629 + 0.01763 4.74024 4.74024 1.46634 0.26236 0.98476 0.46765 + 45 1.56019 1.81908 2.57774 0.59460 104 u u - > + 1.38629 1.38629 1.38629 1.38629 + 0.01757 4.74330 4.74330 1.46634 0.26236 1.12516 0.39245 + 46 2.56361 0.38721 1.86761 2.41311 105 c C - > + 1.38629 1.38629 1.38629 1.38629 + 0.01756 4.74383 4.74383 1.46634 0.26236 1.16513 0.37379 + 47 1.65610 0.99848 1.23540 1.89738 106 c c - > + 1.39034 1.39806 1.42179 1.33693 + 0.03045 3.84973 4.74383 0.68205 0.70437 0.97754 0.47198 + 48 2.00597 3.06134 0.26182 3.01610 108 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.02119 4.74562 4.39977 1.46634 0.26236 1.09861 0.40547 + 49 0.58998 1.58170 2.46808 1.86247 109 a A - - + 1.46483 1.46483 1.42461 1.21347 + 0.04703 3.29086 4.74203 0.41677 1.07638 0.88179 0.53450 + 50 0.30089 2.06397 2.82571 2.60877 111 a A - - + 1.38629 1.38629 1.38629 1.38629 + 0.01853 4.74562 4.63915 1.46634 0.26236 1.09861 0.40547 + 51 2.65105 3.53770 0.14206 3.41803 112 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01755 4.74465 4.74465 1.46634 0.26236 1.13538 0.38758 + 52 2.25374 1.86995 2.96108 0.37237 113 u u - > + 1.38629 1.38629 1.38629 1.38629 + 0.02119 4.74465 4.40076 1.46634 0.26236 1.13538 0.38758 + 53 2.30451 2.06868 1.68368 0.53077 114 u U - - + 1.38629 1.38629 1.38629 1.38629 + 0.01761 4.74107 4.74107 1.46634 0.26236 1.15277 0.37945 + 54 0.07177 3.47955 4.08658 3.83347 115 A A - - + 1.38629 1.38629 1.38629 1.38629 + 0.01760 4.74205 4.74205 1.46634 0.26236 1.22683 0.34703 + 55 0.07030 3.55771 4.11216 3.77171 116 A A - - + 1.35190 1.39803 1.39803 1.39803 + 0.02925 4.35534 4.13613 1.05273 0.42922 1.22683 0.34703 + 56 1.71014 3.01237 0.32835 2.99827 118 g G - - + 1.85597 1.60694 0.90297 1.43618 + 0.08251 2.66512 4.64522 2.63460 0.07445 0.63738 0.75221 + 57 2.17286 0.32500 2.86949 2.23591 136 c c - > + 1.38629 1.38629 1.38629 1.38629 + 0.01855 4.74481 4.63738 1.46634 0.26236 1.03982 0.43621 + 58 1.54608 1.38572 1.03143 1.71325 137 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01755 4.74464 4.74464 1.46634 0.26236 1.02829 0.44257 + 59 1.73179 1.41448 1.67883 0.93296 138 u c - > + 1.38629 1.38629 1.38629 1.38629 + 0.01820 4.74562 4.67277 1.46634 0.26236 1.09861 0.40547 + 60 1.88051 1.48253 0.89529 1.55148 139 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01802 4.74497 4.69262 1.46634 0.26236 1.05064 0.43034 + 61 1.83989 0.89343 1.57228 1.49458 140 c c - > + 1.38629 1.38629 1.38629 1.38629 + 0.02062 4.74516 4.44689 1.46634 0.26236 1.06402 0.42322 + 62 1.95525 1.32639 2.27770 0.71229 141 u U - - + 1.38629 1.38629 1.38629 1.38629 + 0.01759 4.74260 4.74260 1.46634 0.26236 0.90996 0.51507 + 63 1.17195 1.68298 2.18999 0.93522 142 u u - - + 1.42850 1.37453 1.30763 1.44006 + 0.04330 3.39069 4.74562 0.45444 1.00732 1.09861 0.40547 + 64 2.06137 2.80967 0.29508 2.68794 144 g g - > + 1.39830 1.35112 1.39830 1.39830 + 0.02181 4.35145 4.74562 1.04608 0.43281 1.09861 0.40547 + 65 2.09444 1.16363 0.78407 2.22593 146 g g - > + 1.36526 1.42987 1.32423 1.42987 + 0.02827 3.99693 4.65608 0.93847 0.49631 1.09861 0.40547 + 66 2.69864 3.10988 0.17720 2.98611 149 g g - < + 1.44804 1.49525 1.55952 1.10703 + 0.04988 3.21982 4.74481 0.88506 0.53220 1.03982 0.43621 + 67 2.75316 0.33473 2.45264 2.00485 153 c C - < + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 68 2.10133 1.01237 1.16670 1.59476 154 c c - < + 1.35433 1.34384 1.40173 1.44874 + 0.03127 3.81223 4.74562 0.88206 0.53432 1.09861 0.40547 + 69 1.06718 1.90576 1.25097 1.50920 157 a a - - + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 70 1.17832 2.48536 0.67247 2.31807 158 g g - < + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 71 1.12397 2.13330 1.34852 1.21421 159 a g - < + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 72 1.92362 1.97652 0.76090 1.39381 160 g g - - + 1.38629 1.38629 1.38629 1.38629 + 0.19341 4.74562 1.78878 1.46634 0.26236 1.09861 0.40547 + 73 1.49538 1.94718 2.24044 0.64103 161 u U - - + 1.38629 1.38629 1.38629 1.38629 + 0.19486 4.57308 1.79140 1.46634 0.26236 3.05575 0.04823 + 74 0.45457 2.40448 2.17182 1.82635 162 a A - - + 1.25913 1.51334 1.51334 1.28825 + 0.08684 2.64599 4.40301 0.39629 1.11722 0.32782 1.27474 + 75 2.93299 3.01855 0.13952 3.57151 165 g G - - + 1.38629 1.38629 1.38629 1.38629 + 0.01888 4.67199 4.67199 1.46634 0.26236 0.20256 1.69628 + 76 3.99358 2.92642 4.19781 0.09107 166 U U - < + 1.38629 1.38629 1.38629 1.38629 + 0.01754 4.74521 4.74521 1.46634 0.26236 1.11450 0.39761 + 77 0.11617 3.11470 3.66144 3.22913 167 a A - - + 1.38629 1.38629 1.38629 1.38629 + 0.03608 4.74521 3.62152 1.46634 0.26236 1.11450 0.39761 + 78 2.84125 0.35510 1.85871 2.46899 168 c c - < + 1.38629 1.38629 1.38629 1.38629 + 0.01843 4.72699 4.66629 1.46634 0.26236 1.63398 0.21710 + 79 3.31819 2.37733 3.05633 0.19368 169 u u - < + 1.38629 1.38629 1.38629 1.38629 + 0.02382 4.72644 4.22112 1.46634 0.26236 1.34190 0.30293 + 80 1.03409 2.18770 1.15649 1.52470 170 a a - - + 1.38629 1.38629 1.38629 1.38629 + 0.01792 4.72376 4.72376 1.46634 0.26236 1.06120 0.42471 + 81 1.86872 1.58634 0.86303 1.51811 171 g g - - + 1.38629 1.38629 1.38629 1.38629 + 0.02599 4.73172 4.08388 1.46634 0.26236 1.33266 0.30622 + 82 1.78916 2.52942 0.37772 2.69164 172 g G - - + 1.38629 1.38629 1.38629 1.38629 + 0.14347 4.72552 2.08118 1.46634 0.26236 1.35133 0.29961 + 83 0.89061 2.00611 1.13404 2.01474 173 a a - - + 1.34036 1.40209 1.40209 1.40209 + 0.16534 4.10702 1.99558 1.13868 0.38602 1.47891 0.25862 + 84 2.50495 2.45089 2.96402 0.24783 176 u U - - + 1.38629 1.38629 1.38629 1.38629 + 0.04231 4.50624 3.49387 1.46634 0.26236 0.12430 2.14660 + 85 1.96114 2.03452 0.46600 2.29211 177 g G - < + 1.38629 1.38629 1.38629 1.38629 + 0.01800 4.71951 4.71951 1.46634 0.26236 1.78612 0.18345 + 86 1.96207 1.76997 0.59462 1.98542 178 g g - < + 1.38629 1.38629 1.38629 1.38629 + 0.06564 4.71951 2.90743 1.46634 0.26236 1.68897 0.20421 + 87 2.48356 3.14584 0.23004 2.53801 179 g G - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01885 4.67369 4.67369 1.46634 0.26236 1.45718 0.26513 + 88 1.55291 1.80348 1.69102 0.82254 180 u u - _ + 1.38629 1.38629 1.38629 1.38629 + 0.02185 4.68814 4.38920 1.46634 0.26236 0.91915 0.50892 + 89 1.98709 2.38814 0.43269 2.10089 181 g G - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01940 4.71010 4.58486 1.46634 0.26236 1.66421 0.20991 + 90 0.48856 2.23554 2.05100 1.89081 182 a A - _ + 1.38629 1.38629 1.38629 1.38629 + 0.04725 4.71185 3.29255 1.46634 0.26236 0.71674 0.67010 + 91 2.08360 0.69276 1.61378 1.73617 183 c c - > + 1.38629 1.38629 1.38629 1.38629 + 0.02481 4.20562 4.64669 0.95696 0.48460 1.07845 0.41570 + 92 2.58338 0.49017 1.71456 2.02558 185 c C - > + 1.41519 1.37531 1.38981 1.36557 + 0.05835 3.94245 3.28929 0.75530 0.63463 1.29897 0.31857 + 93 1.44648 1.55255 1.47019 1.13006 187 u u - - + 1.38629 1.38629 1.38629 1.38629 + 0.02238 4.69570 4.34314 1.46634 0.26236 0.91028 0.51486 + 94 2.44101 0.55901 2.21621 1.46046 188 c C - - + 1.38629 1.38629 1.38629 1.38629 + 0.01922 4.71416 4.59873 1.46634 0.26236 1.56388 0.23486 + 95 1.61020 0.94090 1.43457 1.76225 189 c c - - + 1.38629 1.38629 1.38629 1.38629 + 0.03993 4.71641 3.50002 1.46634 0.26236 1.58461 0.22945 + 96 2.18875 1.48910 2.33334 0.57023 190 u U - - + 1.38629 1.38629 1.38629 1.38629 + 0.03318 4.69762 3.74983 1.46634 0.26236 1.94079 0.15501 + 97 2.22832 2.76588 0.27337 2.68011 191 g G - - + 1.39934 1.39934 1.39934 1.34815 + 0.03844 4.26291 3.74534 1.02156 0.44634 0.26192 1.46781 + 98 2.23154 2.13562 0.42666 2.10555 193 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01781 4.72977 4.72977 1.46634 0.26236 1.57070 0.23307 + 99 2.25826 3.31778 0.19553 3.30137 194 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01781 4.72977 4.72977 1.46634 0.26236 0.55476 0.85381 + 100 0.12870 3.11387 3.74780 2.94195 195 a A - - + 1.49330 1.35303 1.35303 1.35303 + 0.03095 3.82658 4.74521 1.21784 0.35079 1.11450 0.39761 + 101 0.52168 2.77619 1.31025 2.59762 199 a A - > + 1.42051 1.37646 1.33049 1.42051 + 0.50426 3.27205 1.02688 0.50867 0.91954 1.11450 0.39761 + 102 1.75707 1.65310 1.00247 1.31295 202 g g - - + 1.38629 1.38629 1.38629 1.38629 + 0.02709 4.31530 4.31530 1.46634 0.26236 0.06230 2.80676 + 103 0.98530 1.54075 2.13641 1.22290 203 a a - - + 1.38629 1.38629 1.38629 1.38629 + 0.01763 4.74029 4.74029 1.46634 0.26236 0.80681 0.59109 + 104 1.91716 0.92422 1.32032 1.66554 204 c c - > + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 105 2.25576 0.81533 2.15099 1.08962 205 c c - > + 1.38629 1.38629 1.38629 1.38629 + 0.03272 4.74562 3.75059 1.46634 0.26236 1.09861 0.40547 + 106 1.07692 1.79642 1.75375 1.13836 206 a a - - + 1.47073 1.41139 1.36911 1.30156 + 0.05759 3.16375 4.29051 0.98783 0.46583 0.55355 0.85544 + 107 1.77856 1.36913 0.90724 1.75358 211 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01762 4.74081 4.74081 1.46634 0.26236 1.26779 0.33052 + 108 2.13110 2.71915 0.28126 2.80460 212 g G - > + 1.37506 1.46572 1.39000 1.31980 + 0.04786 3.28070 4.69626 0.41342 1.08289 1.26779 0.33052 + 109 1.62498 2.09339 1.94510 0.62205 214 u u - > + 1.67585 0.77934 1.88169 1.60043 + 0.48638 0.97901 4.65939 0.06094 2.82815 0.84034 0.56486 + 110 1.68495 3.43968 0.29793 3.21597 217 g g - ) + 1.40521 1.33160 1.40521 1.40521 + 0.03979 4.17697 3.74368 0.90276 0.51995 1.14102 0.38492 + 111 2.20670 1.02757 2.06911 0.90199 219 u c - ) + 1.38629 1.38629 1.38629 1.38629 + 0.03044 4.72941 3.85623 1.46634 0.26236 0.89025 0.52857 + 112 2.56171 1.14811 2.54620 0.64015 220 u c - ) + 1.22072 1.44819 1.44819 1.44819 + 0.03025 3.90293 4.64488 1.01008 0.45286 1.57949 0.23077 + 113 2.42825 3.31342 0.17401 3.34863 223 g g - ) + 1.37346 1.36856 1.40182 1.40182 + 0.02545 4.23975 4.53557 0.96752 0.47807 0.78017 0.61309 + 114 2.28462 0.62468 2.31949 1.33017 225 c C - ) + 1.38629 1.38629 1.38629 1.38629 + 0.01814 4.73640 4.68744 1.46634 0.26236 1.26908 0.33002 + 115 0.95503 1.17607 1.96286 1.79422 226 a a - ) + 1.38629 1.38629 1.38629 1.38629 + 0.02255 4.73719 4.30270 1.46634 0.26236 1.31072 0.31420 + 116 1.24426 1.94348 1.35080 1.17245 227 u g - ) + 1.38629 1.38629 1.38629 1.38629 + 0.03289 4.73302 3.74836 1.46634 0.26236 1.49088 0.25512 + 117 1.61067 1.83596 1.13930 1.13712 228 u g - ) + 1.38629 1.38629 1.38629 1.38629 + 0.03308 4.71816 3.74632 1.46634 0.26236 1.68263 0.20565 + 118 1.74114 0.93694 1.54031 1.52079 229 c c - ) + 1.38629 1.38629 1.38629 1.38629 + 0.10437 4.70462 2.40738 1.46634 0.26236 2.03123 0.14061 + 119 1.80143 0.85215 2.05176 1.27320 230 c c - : + 1.38629 1.38629 1.38629 1.38629 + 0.01000 4.61025 * 1.46634 0.26236 0.00000 * +// \ No newline at end of file diff --git a/q2_types/hmmer/tests/data/hmms/4_amino.hmm b/q2_types/hmmer/tests/data/hmms/4_amino.hmm new file mode 100644 index 00000000..54c7aee3 --- /dev/null +++ b/q2_types/hmmer/tests/data/hmms/4_amino.hmm @@ -0,0 +1,199 @@ +HMMER3/f [3.1b2 | February 2015] +NAME COG1413.faa.final_tree.fa +LENG 5 +ALPH amino +RF no +MM no +CONS yes +CS no +MAP yes +DATE Fri Sep 28 16:11:42 2018 +NSEQ 9184 +EFFN 9184.000000 +CKSUM 4188890012 +STATS LOCAL MSV -7.8758 1.53292 +STATS LOCAL VITERBI -9.2150 1.53292 +STATS LOCAL FORWARD -1.5202 1.53292 +HMM A C D E F G H I K L M N P Q R S T V W Y + m->m m->i m->d i->m i->i d->m d->d + COMPO 2.15961 4.83096 2.77614 2.66643 3.50057 2.79412 3.93266 3.01265 3.03101 2.13764 4.11997 3.37590 2.99384 3.18466 2.69554 2.85142 3.03638 2.77312 4.20758 3.64868 + 2.22461 4.68737 2.78288 2.68609 3.36090 2.76898 3.86143 3.03984 3.09492 2.14663 4.01338 3.37541 2.96650 3.17989 2.72654 2.84544 3.01045 2.78385 4.15025 3.55881 + 8.73029 0.00239 6.10854 5.69023 0.00338 0.00000 * + 1 1.66767 4.71954 3.08045 2.87247 3.65571 2.26664 3.96771 3.19781 3.23087 2.26570 4.51338 3.75729 3.42258 3.36819 2.77826 2.88913 2.97233 2.65924 4.72973 4.00800 5552 a - - - + 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503 + 0.00571 12.65184 5.16945 0.61958 0.77255 0.04657 3.09005 + 2 2.31156 5.04536 3.24595 3.10731 3.13564 3.46905 4.11287 2.66796 3.74456 1.11103 3.61586 3.81646 3.21608 3.67606 3.55457 3.32566 3.44135 2.79225 4.73964 4.09590 5553 l - - - + 2.11512 4.85437 2.73686 2.66830 3.57135 2.79738 3.88428 3.06039 3.14725 2.13746 4.16816 3.44640 2.97082 3.19489 2.64650 2.85141 3.05782 2.74390 4.18075 3.68214 + 7.05252 0.00117 8.11295 4.96035 0.00704 4.03374 0.01787 + 3 1.09776 4.41072 3.43804 3.12910 3.67283 3.24355 4.26962 3.06573 3.50913 2.39573 4.44403 3.54797 3.12238 3.85454 3.00890 3.10175 3.29924 2.61831 4.53614 4.45233 7323 a - - - + 2.51762 4.44879 2.78132 2.80296 3.26580 2.56005 3.57238 3.18520 2.78218 2.48137 4.08040 2.89413 2.89714 3.32910 2.94074 2.49537 2.81032 2.78129 4.68396 3.70182 + 0.66765 3.64611 0.77437 2.25260 0.11107 2.24263 0.11225 + 4 1.27089 4.11207 4.48177 3.67245 3.38524 4.31027 4.80205 2.11097 4.37564 1.49386 3.77536 5.01647 5.09441 4.35276 4.24417 4.13032 3.83956 2.05986 5.86496 3.82566 7345 a - - - + 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503 + 0.00001 11.96592 12.68827 0.61958 0.77255 0.01113 4.50333 + 5 2.27563 4.48763 2.63898 2.21860 3.77175 3.23037 4.08698 3.08710 3.06519 2.13574 4.28409 3.30047 3.45142 3.04825 2.61723 2.79292 2.97656 2.96078 3.65679 3.75327 7346 l - - - + 2.14140 4.92283 2.78072 2.65305 3.57236 2.80660 3.99033 2.98567 2.96282 2.13610 4.17486 3.35504 3.01407 3.18202 2.68913 2.85327 3.04458 2.77653 4.24923 3.69308 + 9.23040 0.00010 * 6.21894 0.00199 0.00000 * +// +HMMER3/f [3.1b2 | February 2015] +NAME COG3637.faa.final_tree.fa +LENG 6 +ALPH amino +RF no +MM no +CONS yes +CS no +MAP yes +DATE Fri Aug 3 19:48:44 2018 +NSEQ 12031 +EFFN 12031.000000 +CKSUM 3862774235 +STATS LOCAL MSV -7.5078 1.34307 +STATS LOCAL VITERBI -8.9538 1.34307 +STATS LOCAL FORWARD -2.4232 1.34307 +HMM A C D E F G H I K L M N P Q R S T V W Y + m->m m->i m->d i->m i->i d->m d->d + COMPO 2.30472 5.72662 2.65025 3.17462 3.20800 2.29735 4.46594 3.03091 3.63654 2.51278 4.43257 2.94238 3.14849 3.42439 3.29814 2.46145 2.41695 2.55621 4.45594 3.33477 + 2.26431 6.00420 2.59661 3.22760 3.34630 2.30257 4.61214 3.03864 3.96129 2.54135 4.65831 2.92972 3.17590 3.46695 3.43775 2.37978 2.26145 2.48727 4.64141 3.47195 + 5.60897 0.00368 11.90158 7.18477 0.00076 0.00000 * + 1 1.76775 5.53582 3.58050 4.47646 3.49684 3.42628 4.15773 2.99182 4.49770 2.18475 3.71798 3.46923 3.23105 3.92791 3.36807 1.86707 3.00401 2.52752 5.60629 2.33252 14496 a - - - + 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503 + 0.04406 11.67918 3.14426 0.61958 0.77255 0.48576 0.95510 + 2 2.32791 5.91131 2.64612 2.57686 3.97472 2.65364 5.71748 2.90390 2.93815 2.61575 4.32097 3.84498 3.47107 2.37258 2.87298 2.20219 2.70877 2.62512 7.14999 4.29931 14497 s - - - + 2.37340 5.36387 2.71259 3.13142 3.05350 2.30070 4.34034 3.01475 3.29843 2.48594 4.17390 2.92205 3.10451 3.37771 3.23286 2.56986 2.61999 2.66876 4.25870 3.16979 + 2.96171 0.05312 12.36695 6.66031 0.00128 5.06753 0.00632 + 3 2.24405 6.39712 4.33363 3.74889 2.38294 2.45884 6.02123 3.05321 3.33143 2.09391 4.70965 3.96642 3.06806 4.54790 3.08402 2.97533 3.85379 2.71908 3.71078 1.73841 20929 y - - - + 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503 + 0.56216 11.97804 0.84392 0.61958 0.77255 5.06753 0.00632 + 4 2.66742 10.10073 3.44052 3.15809 4.14530 2.44298 4.24196 3.29279 3.19376 3.46431 4.11702 2.90857 3.90727 2.39744 1.58974 2.37960 2.57033 3.13272 5.04854 3.71296 20930 r - - - + 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503 + 0.02062 11.41543 3.89242 0.61958 0.77255 4.75595 0.00864 + 5 2.65103 6.21850 4.59602 4.11592 1.93184 2.99510 2.98199 3.88963 3.89661 2.14837 4.53432 4.27931 3.88549 3.41792 4.49224 2.84627 3.30755 2.88616 3.07750 1.54951 20931 y - - - + 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503 + 0.00002 11.37781 12.10016 0.61958 0.77255 0.47375 0.97461 + 6 2.52768 5.79965 2.50208 3.16115 3.35841 2.74299 4.00827 3.48943 2.66294 3.03487 3.82871 2.57695 3.40220 3.41742 2.53009 2.53664 2.06544 3.22062 4.19160 3.78437 20932 t - - - + 2.33118 5.69203 2.75236 3.05157 3.04596 2.25946 4.21090 3.03382 3.39639 2.45342 4.25659 3.05273 3.13205 3.35908 2.94652 2.61143 2.79499 2.61946 4.24444 3.22087 + 8.44071 0.00022 * 5.84129 0.00291 0.00000 * +// +HMMER3/f [3.1b2 | February 2015] +NAME COG0497.faa.final_tree.fa +LENG 11 +ALPH amino +RF no +MM no +CONS yes +CS no +MAP yes +DATE Fri Aug 3 19:56:36 2018 +NSEQ 5526 +EFFN 5526.000000 +CKSUM 3787118146 +STATS LOCAL MSV -8.9984 1.24534 +STATS LOCAL VITERBI -12.0136 1.24534 +STATS LOCAL FORWARD -3.4697 1.24534 +HMM A C D E F G H I K L M N P Q R S T V W Y + m->m m->i m->d i->m i->i d->m d->d + COMPO 2.38131 5.10641 2.75256 2.53953 3.32288 2.76773 4.05138 2.91305 2.85809 2.24220 4.08945 3.08958 3.30555 3.01642 2.82571 2.73616 2.92908 2.85388 4.48576 3.49539 + 2.42976 4.85209 2.73943 2.69405 3.13608 2.72166 3.95490 2.79186 2.95482 2.18843 3.88863 3.06731 3.25765 3.25365 2.92083 2.72352 2.94071 2.81638 4.52773 3.47604 + 2.01375 0.28821 2.14636 4.95328 0.00709 0.00000 * + 1 2.44030 4.70713 3.09980 2.25978 4.14273 2.58827 3.50871 3.23257 3.01096 2.96616 2.33495 2.65228 4.73977 3.21831 1.91461 2.53493 3.34881 5.24301 6.29148 4.79903 1196 r - - - + 2.46185 4.97124 2.79976 2.56216 3.23038 2.74013 4.03908 2.76227 2.81831 2.19244 4.01846 3.04493 3.41203 3.10387 2.96559 2.71333 2.91054 2.82729 4.70808 3.51685 + 5.51869 0.03357 3.54033 4.84826 0.00787 5.77768 0.00310 + 2 2.52693 5.25978 2.59273 1.92583 4.70287 2.84515 5.06306 3.68497 2.21757 2.26895 5.31209 2.96573 3.48859 2.38123 3.85551 2.46359 3.02073 3.55755 6.29867 3.98584 2163 e - - - + 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503 + 0.59532 11.88419 0.80161 0.61958 0.77255 6.06405 0.00233 + 3 2.00951 6.60927 2.98515 2.05550 4.13057 3.97299 4.68133 2.91357 2.87252 2.08743 4.42433 2.65165 3.79650 3.32330 3.21755 2.59182 3.10243 3.15694 7.82996 2.76134 2164 a - - - + 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503 + 0.00002 11.28895 12.01129 0.61958 0.77255 0.18639 1.77165 + 4 2.91402 4.64140 3.89618 2.96395 2.98843 3.60685 3.93817 2.25463 2.67861 1.45578 2.77743 3.86743 4.46450 2.89721 3.06713 3.02579 3.31504 2.99416 4.68449 3.91872 2165 l - - - + 2.32359 5.12598 2.82245 2.36657 3.41800 3.08769 4.17909 2.94470 2.65878 2.16860 4.31250 3.07462 3.48628 2.84758 2.81823 2.76235 2.95849 2.95407 4.70177 3.57467 + 5.56919 0.00382 12.61112 4.62500 0.00985 0.39046 1.12931 + 5 2.49587 5.78205 2.93011 2.30431 4.46509 3.04313 4.63540 2.91069 2.49748 2.15275 2.89121 3.63653 3.00737 2.37103 2.95046 3.07891 2.93762 3.01279 6.42765 4.36280 2919 l - - - + 2.41565 5.11266 2.63088 2.32394 3.42744 2.99918 4.03433 2.89553 2.74089 2.17929 4.20335 3.04822 3.59473 2.90837 2.85189 2.76854 3.05105 2.96062 4.57614 3.59111 + 4.63863 0.05980 3.02880 4.97048 0.00696 0.94885 0.48969 + 6 2.08370 6.27337 2.43491 2.61225 3.27263 3.23904 4.87381 2.73183 2.61377 2.10665 4.50926 3.20299 3.59142 2.75789 3.00867 2.65432 3.67490 3.29581 4.15024 3.59610 3846 a - - - + 2.41073 5.14665 2.78063 2.54229 3.36999 2.75215 3.99616 2.88343 2.77225 2.29270 3.99652 3.05959 3.28108 2.97321 2.87925 2.71334 2.90280 2.81656 4.65313 3.61175 + 8.44651 0.03192 3.46734 5.34138 0.00480 5.75271 0.00318 + 7 2.43153 4.68520 2.99225 2.05361 3.52968 3.14415 3.44259 3.94950 2.71188 2.59535 3.79701 3.23263 2.91364 2.53810 2.80469 2.83291 3.17089 2.63670 4.28149 4.02922 5198 e - - - + 2.80770 4.55380 2.90002 2.82129 3.38935 2.54998 3.90262 3.04669 2.79697 2.05386 4.17388 2.96404 2.91507 3.21340 2.86866 2.54969 2.92361 2.81804 4.66525 3.62520 + 0.96309 0.60825 2.60386 0.00007 9.58892 0.20721 1.67583 + 8 2.97388 6.65095 2.83103 2.78047 2.39300 2.94817 5.06486 2.60908 3.49036 1.94784 3.93019 3.77420 3.00718 3.28093 3.47080 2.27978 3.20480 2.91331 3.64817 3.08036 5200 l - - - + 2.47544 5.16604 2.78609 2.50260 3.18842 2.81473 3.97640 2.86915 2.79685 2.31632 4.08336 2.92494 3.38872 3.02383 2.91220 2.72420 2.84257 2.92662 4.57041 3.34416 + 4.12495 0.01758 6.67809 4.62831 0.00982 5.93323 0.00265 + 9 2.92026 4.44257 3.40556 3.48545 2.63040 3.23176 5.57575 2.99483 3.10889 1.93434 3.25653 3.22319 3.48410 3.02714 3.13502 2.53712 3.24280 2.33973 5.22584 2.35299 5782 l - - - + 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503 + 0.43899 11.66975 1.03478 0.61958 0.77255 5.94655 0.00262 + 10 2.45554 5.39094 3.69955 3.58870 2.52392 6.85576 5.15725 2.76872 4.50525 1.17902 2.80744 4.76357 3.27265 4.34321 3.34627 2.62748 3.97528 2.22933 5.27194 3.50168 5783 l - - - + 2.62950 4.57355 2.83691 2.84607 3.62367 2.53528 3.88512 3.17230 2.66069 2.52552 4.10479 2.71254 2.86349 3.09985 2.87970 2.39459 2.87533 2.80009 4.84621 3.68153 + 1.30545 0.31616 11.95313 0.66989 0.71696 0.08975 2.45524 + 11 2.45896 4.87599 2.95494 2.74995 2.18351 3.26869 5.15295 2.09630 2.73626 2.50371 3.02454 3.70759 5.61325 2.89004 3.49806 2.76140 2.87590 2.84010 5.63602 3.87783 5786 i - - - + 2.32966 5.17019 2.74665 2.60831 3.35899 2.67861 4.08831 2.99667 2.97000 2.26799 4.15045 3.15977 3.19999 3.03545 2.74212 2.74385 2.91792 2.82541 4.31803 3.44851 + 5.28450 0.00508 * 6.29494 0.00185 0.00000 * +// +HMMER3/f [3.1b2 | February 2015] +NAME COG2247.faa.final_tree.fa +LENG 15 +ALPH amino +RF no +MM no +CONS yes +CS no +MAP yes +DATE Fri Aug 3 19:30:05 2018 +NSEQ 1885 +EFFN 1885.000000 +CKSUM 1780862598 +STATS LOCAL MSV -8.6248 0.89121 +STATS LOCAL VITERBI -10.0295 0.89121 +STATS LOCAL FORWARD -3.7989 0.89121 +HMM A C D E F G H I K L M N P Q R S T V W Y + m->m m->i m->d i->m i->i d->m d->d + COMPO 2.44915 5.35032 2.71121 2.87542 3.46498 2.46886 4.56713 2.84104 2.69299 2.70644 4.29834 2.77543 3.20376 3.54606 3.59569 2.58148 2.42502 2.55969 4.53409 3.17317 + 2.52100 4.94117 2.71334 2.82310 3.33345 2.48691 4.47207 2.84734 2.71010 2.71266 4.11858 2.72552 3.26108 3.55594 3.59929 2.57465 2.46698 2.64206 4.39050 3.10254 + 6.94031 0.03373 3.43592 6.10372 0.00224 0.00000 * + 1 2.43061 6.52439 2.89802 2.69753 3.60187 3.15781 4.96600 3.02726 2.26414 3.02345 4.19670 2.80793 2.73267 3.62853 3.62439 2.35586 2.28930 2.49488 6.62679 3.30602 6106 k - - - + 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503 + 0.48275 10.68158 0.96001 0.61958 0.77255 3.43217 0.03285 + 2 2.29864 5.37983 2.51938 2.84317 1.91465 3.13796 5.59390 3.28933 2.73182 2.96960 4.05621 2.90842 3.31072 3.93153 4.00388 2.95394 2.80165 2.45659 10.11653 2.76111 6107 f - - - + 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503 + 0.00006 10.19889 10.92124 0.61958 0.77255 0.09354 2.41573 + 3 2.17410 4.84595 2.90754 2.72432 4.68798 2.77258 5.69908 3.08962 2.90217 3.32801 3.58714 3.05656 4.07767 3.60106 3.82694 2.88029 1.75701 2.38233 4.00413 2.70473 6108 t - - - + 2.46037 5.20757 2.72168 2.91186 3.49378 2.42990 4.64906 2.81215 2.68323 2.69362 4.36625 2.71725 3.28893 3.58573 3.61924 2.53384 2.45307 2.54531 4.61538 3.20326 + 5.60555 0.00587 6.13149 5.71070 0.00332 3.90691 0.02031 + 4 2.12275 4.79663 2.87783 3.07210 4.29005 2.09936 4.56133 3.27550 2.56420 2.85847 4.22507 3.02677 3.63132 3.71087 3.54265 2.41910 2.39579 2.43932 5.91842 3.27928 8910 g - - - + 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503 + 0.63887 10.76370 0.75058 0.61958 0.77255 3.96294 0.01919 + 5 2.64344 5.21746 2.85540 2.75822 4.08998 1.64616 4.26571 4.35644 2.37010 3.14547 7.11155 2.24956 3.66227 3.49529 3.06344 2.30934 2.90887 3.19326 5.59093 3.81774 8911 g - - - + 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503 + 0.04256 10.12056 3.17902 0.61958 0.77255 2.61204 0.07622 + 6 3.63369 5.17021 3.86440 4.28210 2.09636 3.50098 8.45709 3.69188 3.99534 3.44984 4.80314 2.74745 5.88297 3.90380 4.26743 4.09834 3.35283 3.94832 0.80907 2.53400 8912 w - - - + 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503 + 0.02032 10.14924 3.90840 0.61958 0.77255 2.27910 0.10800 + 7 2.52151 5.92012 2.68873 2.81215 4.09220 2.89742 5.27314 3.17859 2.89818 2.73789 4.83556 2.60549 3.18197 3.06081 3.40504 2.04235 2.10077 2.90021 5.06420 3.39088 8913 s - - - + 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503 + 0.05366 10.21557 2.95241 0.61958 0.77255 2.72071 0.06809 + 8 2.39347 5.52169 3.12658 3.55031 3.06849 2.46790 5.49508 2.82007 2.50113 2.51341 3.83244 2.78515 3.95000 3.50827 4.37407 2.57111 2.25728 2.83285 3.49699 2.83430 8914 t - - - + 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503 + 0.00274 10.20953 5.91609 0.61958 0.77255 0.19996 1.70793 + 9 2.66268 4.92394 2.55293 2.96122 4.18225 2.21914 3.91292 3.29789 2.55556 2.95694 4.89475 2.43521 3.74705 3.30198 3.55323 2.22277 2.44039 3.03045 5.17010 3.00953 8915 g - - - + 2.51035 5.38923 2.67130 2.80403 3.47605 2.49367 4.40952 2.89307 2.63655 2.73143 4.32672 2.77953 3.11413 3.47410 3.62964 2.56713 2.44121 2.58077 4.82114 3.17402 + 4.45914 0.01165 11.42963 4.25413 0.01431 0.27548 1.42383 + 10 2.87844 6.13511 2.83904 3.29960 2.77943 2.84341 4.87921 3.29798 3.11228 2.93260 4.51049 2.80226 3.59361 3.64548 4.33080 2.95609 3.02447 3.18327 1.45344 2.66840 9502 w - - - + 2.47354 5.53653 2.66020 2.89126 3.43716 2.45532 4.48806 2.89766 2.72803 2.74202 4.35534 2.76489 3.18681 3.53367 3.64292 2.60646 2.37136 2.54788 4.51917 3.13808 + 6.30620 0.02607 3.73345 5.36852 0.00467 2.67961 0.07106 + 11 2.54575 5.52988 2.74911 2.72361 3.46855 2.61017 3.71474 3.16017 2.46215 2.90921 4.24126 3.11295 3.99783 3.96546 3.36913 2.66990 2.37400 2.71859 4.34893 2.25461 11547 y - - - + 2.63694 4.43184 2.76392 2.73833 3.47139 2.40420 3.75718 3.29985 2.69878 2.72065 4.22238 2.88894 2.75726 3.21027 2.93295 2.36805 2.70916 2.99250 4.63142 3.62959 + 3.34147 0.67329 0.78837 0.00022 8.40095 2.57883 0.07889 + 12 3.04670 4.76349 3.25799 3.66198 1.57040 2.78472 4.23260 2.68002 3.39496 2.53464 4.03748 3.58880 4.54160 3.56727 4.16059 3.12286 2.46607 2.57600 3.33143 2.60836 11549 f - - - + 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503 + 0.08264 10.03910 2.53487 0.61958 0.77255 3.01077 0.05051 + 13 2.14276 9.51285 2.38194 2.65461 4.28432 2.85919 5.24598 3.29577 2.54179 2.83050 4.05873 3.14987 4.87649 3.11565 3.00288 2.90140 1.94869 2.74349 4.53253 3.36587 11550 t - - - + 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503 + 0.07025 10.00277 2.69129 0.61958 0.77255 2.33163 0.10218 + 14 3.00910 9.54227 2.78636 3.08362 5.84035 1.17700 4.10947 3.02617 2.68203 3.49043 4.32885 3.14393 4.10945 3.46313 4.63063 2.30589 2.71431 2.73949 9.97531 3.73596 11551 g - - - + 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503 + 0.00007 10.02880 10.75115 0.61958 0.77255 0.24716 1.51877 + 15 3.36884 6.47222 3.37283 3.32938 2.93494 3.04103 4.77977 2.98733 3.10146 2.45674 4.17855 3.20130 3.87104 4.13634 4.57927 2.91620 3.00833 2.89599 1.31160 2.62799 11552 w - - - + 2.40141 5.64579 2.72022 2.89053 3.53929 2.47459 4.62356 2.82757 2.68397 2.69561 4.36111 2.82231 3.15718 3.53649 3.57237 2.59607 2.40753 2.52521 4.64653 3.21264 + 5.45881 0.00427 * 6.89781 0.00101 0.00000 * +// diff --git a/q2_types/hmmer/tests/data/hmms/amino_dna.hmm b/q2_types/hmmer/tests/data/hmms/amino_dna.hmm new file mode 100644 index 00000000..a40f43dd --- /dev/null +++ b/q2_types/hmmer/tests/data/hmms/amino_dna.hmm @@ -0,0 +1,302 @@ +HMMER3/f [3.1b2 | February 2015] +NAME COG1413.faa.final_tree.fa +LENG 5 +ALPH amino +RF no +MM no +CONS yes +CS no +MAP yes +DATE Fri Sep 28 16:11:42 2018 +NSEQ 9184 +EFFN 9184.000000 +CKSUM 4188890012 +STATS LOCAL MSV -7.8758 1.53292 +STATS LOCAL VITERBI -9.2150 1.53292 +STATS LOCAL FORWARD -1.5202 1.53292 +HMM A C D E F G H I K L M N P Q R S T V W Y + m->m m->i m->d i->m i->i d->m d->d + COMPO 2.15961 4.83096 2.77614 2.66643 3.50057 2.79412 3.93266 3.01265 3.03101 2.13764 4.11997 3.37590 2.99384 3.18466 2.69554 2.85142 3.03638 2.77312 4.20758 3.64868 + 2.22461 4.68737 2.78288 2.68609 3.36090 2.76898 3.86143 3.03984 3.09492 2.14663 4.01338 3.37541 2.96650 3.17989 2.72654 2.84544 3.01045 2.78385 4.15025 3.55881 + 8.73029 0.00239 6.10854 5.69023 0.00338 0.00000 * + 1 1.66767 4.71954 3.08045 2.87247 3.65571 2.26664 3.96771 3.19781 3.23087 2.26570 4.51338 3.75729 3.42258 3.36819 2.77826 2.88913 2.97233 2.65924 4.72973 4.00800 5552 a - - - + 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503 + 0.00571 12.65184 5.16945 0.61958 0.77255 0.04657 3.09005 + 2 2.31156 5.04536 3.24595 3.10731 3.13564 3.46905 4.11287 2.66796 3.74456 1.11103 3.61586 3.81646 3.21608 3.67606 3.55457 3.32566 3.44135 2.79225 4.73964 4.09590 5553 l - - - + 2.11512 4.85437 2.73686 2.66830 3.57135 2.79738 3.88428 3.06039 3.14725 2.13746 4.16816 3.44640 2.97082 3.19489 2.64650 2.85141 3.05782 2.74390 4.18075 3.68214 + 7.05252 0.00117 8.11295 4.96035 0.00704 4.03374 0.01787 + 3 1.09776 4.41072 3.43804 3.12910 3.67283 3.24355 4.26962 3.06573 3.50913 2.39573 4.44403 3.54797 3.12238 3.85454 3.00890 3.10175 3.29924 2.61831 4.53614 4.45233 7323 a - - - + 2.51762 4.44879 2.78132 2.80296 3.26580 2.56005 3.57238 3.18520 2.78218 2.48137 4.08040 2.89413 2.89714 3.32910 2.94074 2.49537 2.81032 2.78129 4.68396 3.70182 + 0.66765 3.64611 0.77437 2.25260 0.11107 2.24263 0.11225 + 4 1.27089 4.11207 4.48177 3.67245 3.38524 4.31027 4.80205 2.11097 4.37564 1.49386 3.77536 5.01647 5.09441 4.35276 4.24417 4.13032 3.83956 2.05986 5.86496 3.82566 7345 a - - - + 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503 + 0.00001 11.96592 12.68827 0.61958 0.77255 0.01113 4.50333 + 5 2.27563 4.48763 2.63898 2.21860 3.77175 3.23037 4.08698 3.08710 3.06519 2.13574 4.28409 3.30047 3.45142 3.04825 2.61723 2.79292 2.97656 2.96078 3.65679 3.75327 7346 l - - - + 2.14140 4.92283 2.78072 2.65305 3.57236 2.80660 3.99033 2.98567 2.96282 2.13610 4.17486 3.35504 3.01407 3.18202 2.68913 2.85327 3.04458 2.77653 4.24923 3.69308 + 9.23040 0.00010 * 6.21894 0.00199 0.00000 * +// +HMMER3/f [3.1 | February 2013] +NAME MADE1 +ACC DF0000629.2 +DESC MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon +LENG 80 +MAXL 426 +ALPH DNA +RF yes +MM no +CONS yes +CS no +MAP yes +DATE Tue Feb 19 20:33:41 2013 +NSEQ 1997 +EFFN 3.911818 +CKSUM 3015610723 +STATS LOCAL MSV -8.5786 0.71858 +STATS LOCAL VITERBI -9.3632 0.71858 +STATS LOCAL FORWARD -3.4823 0.71858 +HMM A C G T + m->m m->i m->d i->m i->i d->m d->d + COMPO 1.24257 1.59430 1.62906 1.16413 + 1.38629 1.38629 1.38629 1.38629 + 0.03960 3.94183 3.94183 1.46634 0.26236 0.00000 * + 1 2.69765 2.44396 2.81521 0.24089 1 t x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03960 3.94183 3.94183 1.46634 0.26236 1.09861 0.40547 + 2 2.72939 2.37873 2.85832 0.24244 2 t x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03725 4.00179 4.00179 1.46634 0.26236 1.09861 0.40547 + 3 0.16099 3.16370 2.87328 2.99734 3 a x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03604 4.03416 4.03416 1.46634 0.26236 1.09861 0.40547 + 4 1.98862 2.42132 0.42649 2.10770 4 g x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03539 4.05203 4.05203 1.46634 0.26236 1.09861 0.40547 + 5 1.96369 2.69532 0.36534 2.32099 5 g x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03764 4.06427 3.92372 1.46634 0.26236 1.09861 0.40547 + 6 2.56994 2.11239 2.71946 0.30571 6 t x - - + 1.37159 1.41129 1.39124 1.37159 + 0.03806 3.89715 4.07214 1.50442 0.25122 1.00714 0.45454 + 7 2.58388 2.10353 2.64646 0.31253 12 t x - - + 1.38764 1.38524 1.38764 1.38465 + 0.03494 4.03864 4.09125 1.40070 0.28293 1.09237 0.40860 + 8 2.18552 2.70201 0.28821 2.64645 14 g x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03628 4.09157 3.96779 1.46634 0.26236 1.09861 0.40547 + 9 2.16916 2.82142 0.28427 2.60854 15 g x - - + 1.38091 1.39033 1.38365 1.39033 + 0.03566 4.00237 4.08886 1.38021 0.28972 1.01958 0.44745 + 10 2.45517 2.15232 2.42886 0.34277 18 t x - - + 1.39065 1.39065 1.39065 1.37335 + 0.03536 4.01212 4.09576 1.39554 0.28462 1.09775 0.40589 + 11 2.10260 2.95484 0.28160 2.64222 21 g x - - + 1.36740 1.40555 1.40555 1.36740 + 0.03843 3.92069 4.02468 1.44733 0.26814 1.09856 0.40549 + 12 2.54740 0.30185 2.61355 2.21647 26 c x - - + 1.38748 1.38276 1.38748 1.38748 + 0.03457 4.05446 4.09623 1.40847 0.28040 1.05496 0.42803 + 13 0.28443 2.72003 2.32214 2.48149 28 a x - - + 1.38740 1.38740 1.38298 1.38740 + 0.03441 4.05976 4.10001 1.41198 0.27926 1.09780 0.40587 + 14 0.29412 2.55413 2.49679 2.35701 30 a x - - + 1.38194 1.39067 1.38194 1.39067 + 0.03505 4.02482 4.10005 1.39522 0.28473 1.09929 0.40512 + 15 0.18837 2.99710 2.82270 2.77556 33 a x - - + 1.39015 1.39472 1.37503 1.38539 + 0.03725 3.97815 4.02618 1.37955 0.28994 1.10102 0.40426 + 16 0.50816 2.05151 2.22111 1.82407 37 a x - - + 1.36727 1.38730 1.39683 1.39405 + 0.04830 3.89881 3.61610 1.29026 0.32186 1.05306 0.42905 + 17 2.11260 2.73141 0.29747 2.64152 41 g x - - + 1.36913 1.40376 1.40376 1.36913 + 0.03705 3.93681 4.08299 1.44872 0.26771 1.07479 0.41759 + 18 2.24459 1.90539 2.34054 0.43234 46 t x - - + 1.33632 1.42493 1.39937 1.38665 + 0.04427 3.64574 4.06297 1.70501 0.20061 1.21309 0.35279 + 19 0.44322 2.17202 2.18055 2.03175 57 a x - - + 1.41047 1.41471 1.36338 1.35797 + 0.03970 3.81957 4.07540 1.65588 0.21186 1.22788 0.34660 + 20 0.33340 2.42691 2.40824 2.25160 66 a x - - + 1.29389 1.44615 1.37917 1.43324 + 0.04223 3.70146 4.09459 1.55158 0.23815 1.05880 0.42598 + 21 2.50563 1.98543 2.69601 0.33746 74 t x - - + 1.39462 1.39462 1.42862 1.32990 + 0.04184 3.80216 3.98177 1.80466 0.17976 1.00279 0.45705 + 22 2.54484 1.97505 2.66483 0.33806 84 t x - - + 1.39134 1.39489 1.38662 1.37246 + 0.03877 3.97504 3.95038 1.37620 0.29107 1.13932 0.38572 + 23 2.10159 2.83856 0.29282 2.61635 88 g x - - + 1.39682 1.39682 1.35536 1.39682 + 0.05046 3.75402 3.65808 1.08330 0.41321 1.13019 0.39004 + 24 2.25298 0.61854 2.50691 1.29221 90 c x - - + 1.35803 1.49605 1.46737 1.24379 + 0.06091 3.28322 3.83564 1.89752 0.16245 1.28788 0.32276 + 25 1.27819 2.23285 0.76242 1.91259 106 g x - - + 1.29024 1.67349 1.68279 1.04597 + 0.05752 3.44263 3.73311 2.58671 0.07825 1.26818 0.33037 + 26 1.86925 2.58352 0.39466 2.33986 131 g x - - + 1.31084 1.49412 1.46666 1.29002 + 0.04698 3.54257 4.07715 2.25245 0.11109 0.86163 0.54900 + 27 2.38297 1.93394 2.39162 0.39800 151 t x - - + 1.33582 1.47359 1.44163 1.30411 + 0.04951 3.48445 4.03783 2.15951 0.12260 1.21681 0.35122 + 28 2.41717 2.17810 2.62774 0.32113 170 t x - - + 1.36805 1.48060 1.37439 1.32840 + 0.04849 3.50958 4.05014 2.58370 0.07850 1.22399 0.34822 + 29 2.57764 2.35132 2.56552 0.28512 194 t x - - + 1.43829 1.43458 1.24787 1.43829 + 0.04667 3.56670 4.05428 2.49706 0.08591 1.23744 0.34267 + 30 2.47248 2.07688 2.62257 0.33172 215 t x - - + 1.25120 1.52623 1.70635 1.15531 + 0.08932 3.31524 3.01336 2.81842 0.06156 1.22909 0.34610 + 31 2.25937 2.13157 2.02027 0.43957 248 t x - - + 1.18172 1.43522 1.72841 1.28150 + 0.07936 2.93117 3.77395 2.46269 0.08906 0.60457 0.79034 + 32 2.04508 2.84981 0.30490 2.58263 280 g x - - + 1.17665 1.66785 1.66218 1.16056 + 0.05998 3.23615 3.96853 2.83684 0.06040 1.01952 0.44749 + 33 2.45103 0.38098 2.56776 1.87147 317 c x - - + 1.24153 1.52524 1.60663 1.22783 + 0.05538 3.39046 3.90294 2.73920 0.06680 1.18729 0.36391 + 34 2.22082 0.36258 2.75077 2.02704 347 c x - - + 1.15008 1.62014 1.86511 1.10673 + 0.06086 3.18178 4.04341 2.94504 0.05403 1.25991 0.33363 + 35 0.27033 2.66664 2.52541 2.43767 388 a x - - + 1.24951 1.47565 1.41392 1.42074 + 0.07123 3.00373 3.95552 3.13655 0.04440 1.28173 0.32512 + 36 2.83107 2.41670 2.97197 0.22235 439 t x - - + 1.37071 1.57683 1.38637 1.23972 + 0.05293 3.45216 3.91807 2.54402 0.08181 1.14651 0.38235 + 37 2.52322 2.25084 2.45909 0.31611 465 t x - - + 1.26335 1.55077 1.59008 1.19965 + 0.07504 3.13329 3.55006 3.08962 0.04659 1.13108 0.38962 + 38 0.45807 2.30687 1.98940 2.03143 512 a x - - + 1.15472 1.67511 1.53797 1.26320 + 0.09820 3.13076 2.99876 2.79197 0.06326 1.39915 0.28343 + 39 2.37471 0.42180 2.44763 1.80427 550 c x - - + 1.23785 1.49058 1.48364 1.35502 + 0.06081 3.19472 4.01643 2.41851 0.09327 0.94671 0.49105 + 40 2.32826 1.95481 2.36781 0.40458 578 t x - - + 1.36586 1.46001 1.43000 1.29720 + 0.05257 3.39673 4.03256 1.84862 0.17133 1.40979 0.27997 + 41 2.68669 2.13935 2.81520 0.28200 592 t x - - + 1.34965 1.42793 1.45781 1.31633 + 0.04735 3.57826 3.99988 2.09424 0.13144 1.22129 0.34934 + 42 2.55904 2.16444 2.70859 0.29952 609 t x - - + 1.12072 1.61936 1.63578 1.26895 + 0.07346 3.25910 3.42962 2.85641 0.05919 1.38363 0.28857 + 43 1.99923 1.61027 2.26343 0.57851 646 t x - - + 1.32290 1.58747 1.61095 1.11018 + 0.06656 3.08568 3.97944 2.44774 0.09046 0.75593 0.63407 + 44 0.23887 2.79899 2.55209 2.60783 675 a x - - + 1.18557 1.50323 1.59070 1.31590 + 0.05597 3.38637 3.88222 2.46900 0.08847 1.27945 0.32599 + 45 0.29593 2.53488 2.53903 2.32335 701 a x - - + 1.08710 1.54222 1.59276 1.40430 + 0.07539 2.94521 3.91062 1.91623 0.15918 1.22327 0.34852 + 46 2.58352 2.40524 2.76700 0.25955 725 t x - - + 1.19685 1.58503 1.74852 1.14293 + 0.06124 3.18279 4.02089 2.82961 0.06085 1.05474 0.42814 + 47 2.13251 2.88788 0.29508 2.50964 764 g x - - + 1.20891 1.55463 1.68206 1.19000 + 0.06526 3.12574 3.94910 2.41448 0.09367 1.10396 0.40280 + 48 2.23841 2.99164 0.25118 2.72900 792 g x - - + 1.26330 1.55339 1.52606 1.24355 + 0.05464 3.34968 4.01313 2.78872 0.06347 1.15133 0.38012 + 49 2.57533 0.32900 2.64632 2.01501 824 c x - - + 1.35118 1.39828 1.40141 1.39516 + 0.04340 3.79297 3.91506 1.59549 0.22666 1.20075 0.35806 + 50 0.46433 2.04127 2.23437 2.00605 833 a x - - + 1.23062 1.36903 1.62282 1.36182 + 0.05764 3.31530 3.92762 2.28791 0.10700 1.07910 0.41536 + 51 0.27513 2.77017 2.28518 2.57549 853 a x - - + 1.27958 1.58726 1.46109 1.25394 + 0.05750 3.30072 3.96214 2.60776 0.07656 1.25708 0.33475 + 52 0.20149 2.86434 2.84551 2.69770 883 a x - - + 1.23645 1.62259 1.71174 1.10368 + 0.05756 3.26729 4.02702 2.54508 0.08172 1.27391 0.32814 + 53 0.26982 2.65833 2.50477 2.46835 911 a x - - + 1.36005 1.50358 1.48100 1.22550 + 0.06921 3.37553 3.42118 2.36646 0.09851 1.27560 0.32748 + 54 0.40022 2.19284 2.22687 2.20396 934 a x - - + 1.12070 1.60472 1.53213 1.35895 + 0.05523 3.36752 3.94966 2.42917 0.09224 0.84774 0.55928 + 55 2.11356 0.46400 2.46442 1.79955 960 c x - - + 1.23932 1.35913 1.50478 1.46331 + 0.05187 3.47055 3.94022 2.35854 0.09933 1.12102 0.39445 + 56 1.85868 0.79440 2.22069 1.25971 983 c x - - + 1.21951 1.50212 1.51138 1.34185 + 0.06404 3.29054 3.69705 1.75742 0.18933 1.18410 0.36532 + 57 1.33272 2.32720 0.71452 1.90215 999 g x - - + 1.12229 1.49343 1.56653 1.42255 + 0.04920 3.46654 4.08749 2.17995 0.11996 1.31769 0.31164 + 58 2.48337 0.43652 2.46331 1.68683 1017 c x - - + 1.34704 1.55461 1.38112 1.28222 + 0.04823 3.61532 3.90311 2.20911 0.11631 1.00864 0.45368 + 59 0.41659 2.44509 1.93972 2.20507 1034 a x - - + 1.38198 1.38198 1.39194 1.38932 + 0.03641 3.98130 4.06929 1.35873 0.29704 1.31330 0.31325 + 60 0.41612 2.39160 1.97116 2.21075 1037 a x - - + 1.03649 1.46430 1.57421 1.57557 + 0.04769 3.52580 4.06641 2.32461 0.10294 0.84329 0.56263 + 61 2.66264 2.12302 2.82746 0.28581 1056 t x - - + 1.36925 1.39635 1.38930 1.39048 + 0.04097 3.97400 3.84718 1.39433 0.28502 1.12205 0.39395 + 62 2.26510 2.13196 2.42551 0.37231 1060 t x - - + 1.37965 1.39147 1.39147 1.38264 + 0.04082 3.91610 3.90805 1.24613 0.33914 0.95192 0.48776 + 63 0.41244 2.25761 2.16787 2.12907 1062 a x - - + 1.34515 1.41203 1.41203 1.37753 + 0.04054 3.77835 4.08203 1.30483 0.31638 1.11819 0.39582 + 64 2.51464 0.37905 2.62296 1.82008 1068 c x - - + 1.39543 1.38753 1.39233 1.37008 + 0.03854 3.90584 4.03535 1.36573 0.29463 1.13682 0.38689 + 65 2.16380 2.11332 2.18714 0.42765 1073 t x - - + 1.38764 1.38471 1.38519 1.38764 + 0.03575 4.05376 4.03073 1.40080 0.28289 1.03825 0.43707 + 66 2.79349 2.39141 2.87271 0.23478 1075 t x - - + 1.37227 1.39101 1.39101 1.39101 + 0.03597 4.01447 4.05827 1.39017 0.28639 1.06429 0.42308 + 67 2.82488 2.47749 2.93179 0.21887 1078 t x - - + 1.38141 1.39112 1.38915 1.38353 + 0.03661 3.99477 4.04370 1.35958 0.29675 1.13439 0.38804 + 68 2.77679 2.30433 2.90694 0.24425 1081 t x - - + 1.37593 1.38989 1.45520 1.32825 + 0.04447 3.68736 3.99242 1.76176 0.18843 0.98580 0.46703 + 69 2.47698 3.17398 0.19595 2.95437 1093 g x - - + 1.38264 1.38264 1.39734 1.38264 + 0.05358 3.96553 3.40487 1.40348 0.28202 1.03112 0.44100 + 70 2.84327 0.27906 2.97336 2.00890 1097 c x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03412 4.08811 4.08811 1.46634 0.26236 0.69006 0.69625 + 71 0.21870 2.83638 2.69251 2.65798 1098 a x - - + 1.37446 1.37942 1.39640 1.39509 + 0.03670 3.93983 4.09935 1.41905 0.27700 1.10002 0.40476 + 72 2.35233 0.46085 2.23804 1.78715 1103 c x - - + 1.38536 1.38781 1.38781 1.38421 + 0.03493 4.03822 4.09272 1.39310 0.28542 1.09638 0.40658 + 73 2.57111 0.32543 2.74124 1.98892 1105 c x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03381 4.09688 4.09688 1.46634 0.26236 1.09626 0.40664 + 74 0.27014 2.61416 2.53262 2.47636 1106 a x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03461 4.09267 4.05587 1.46634 0.26236 1.09748 0.40603 + 75 0.52873 2.16549 1.91736 1.90409 1107 a x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03426 4.08396 4.08396 1.46634 0.26236 1.07423 0.41788 + 76 2.33134 0.38082 2.65861 1.90055 1108 c x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03466 4.07266 4.07266 1.46634 0.26236 1.09861 0.40547 + 77 2.20588 0.45134 2.35553 1.84373 1109 c x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03550 4.04912 4.04912 1.46634 0.26236 1.09861 0.40547 + 78 2.69018 2.22054 2.82311 0.26898 1110 t x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03711 4.00561 4.00561 1.46634 0.26236 1.09861 0.40547 + 79 0.16248 3.15867 2.86159 2.98963 1111 a x - - + 1.38629 1.38629 1.38629 1.38629 + 0.04048 3.92018 3.92018 1.46634 0.26236 1.09861 0.40547 + 80 0.17484 3.04770 2.86638 2.88183 1112 a x - - + 1.38629 1.38629 1.38629 1.38629 + 0.02045 3.90014 * 1.46634 0.26236 0.00000 * +// diff --git a/q2_types/hmmer/tests/data/hmms/rna.hmm b/q2_types/hmmer/tests/data/hmms/rna.hmm new file mode 100644 index 00000000..5dd4359f --- /dev/null +++ b/q2_types/hmmer/tests/data/hmms/rna.hmm @@ -0,0 +1,382 @@ +HMMER3/f [3.1b2 | February 2015] +NAME 5S_rRNA +ACC RF00001 +DESC 5S ribosomal RNA +LENG 119 +MAXL 305 +ALPH RNA +RF yes +MM no +CONS yes +CS yes +MAP yes +DATE Sun Feb 19 15:28:58 2017 +NSEQ 712 +EFFN 9.307953 +CKSUM 242171328 +STATS LOCAL MSV -9.2495 0.71806 +STATS LOCAL VITERBI -10.6521 0.71806 +STATS LOCAL FORWARD -3.9747 0.71806 +HMM A C G U + m->m m->i m->d i->m i->i d->m d->d + COMPO 1.44549 1.37725 1.25977 1.47676 + 1.43153 1.34302 1.34302 1.43153 + 0.07274 4.09044 2.92951 1.07081 0.41966 0.00000 * + 1 1.54335 1.70220 1.04746 1.37348 3 g g - ( + 1.38629 1.38629 1.38629 1.38629 + 0.02796 4.69986 3.99134 1.46634 0.26236 0.98356 0.46837 + 2 1.69437 1.14964 1.46606 1.31416 4 c c - ( + 1.38629 1.38629 1.38629 1.38629 + 0.01877 4.70906 4.64822 1.46634 0.26236 1.12704 0.39155 + 3 1.25252 1.38627 1.88263 1.16469 5 u c - ( + 1.38629 1.38629 1.38629 1.38629 + 0.01965 4.72024 4.55191 1.46634 0.26236 0.80895 0.58937 + 4 1.72700 2.07424 1.11856 0.99484 6 u u - ( + 1.38629 1.38629 1.38629 1.38629 + 0.01870 4.73297 4.63281 1.46634 0.26236 0.66621 0.72083 + 5 1.54450 2.37555 0.52987 2.25440 7 g G - ( + 1.38629 1.38629 1.38629 1.38629 + 0.02087 4.74329 4.42737 1.46634 0.26236 1.07337 0.41833 + 6 2.71222 0.55655 2.16572 1.40340 8 c c - ( + 1.46123 1.34104 1.46123 1.29271 + 0.04754 3.32700 4.55416 0.43093 1.04954 1.06781 0.42123 + 7 1.88183 3.18268 0.27858 3.00864 10 g g - ( + 1.38629 1.38629 1.38629 1.38629 + 0.02914 4.74112 3.91233 1.46634 0.26236 1.10199 0.40378 + 8 1.21294 1.87108 0.91371 1.91267 11 g g - ( + 1.42544 1.35183 1.34543 1.42544 + 0.03222 3.77698 4.73128 0.64731 0.74118 1.45006 0.26730 + 9 2.92783 0.67317 2.82760 0.97486 13 c c - ( + 1.39851 1.35052 1.39851 1.39851 + 0.02477 4.33224 4.48084 1.04107 0.43553 1.51536 0.24811 + 10 2.18357 0.54013 1.84964 1.91465 15 c C - , + 1.37342 1.39919 1.39919 1.37371 + 0.02250 4.31140 4.72954 1.02501 0.44440 1.36766 0.29397 + 11 0.38365 2.42629 2.19201 2.13222 17 a A - , + 1.38629 1.38629 1.38629 1.38629 + 0.04294 4.73158 3.40456 1.46634 0.26236 0.62352 0.76799 + 12 2.20731 2.05846 2.30257 0.41196 18 u U - , + 1.39935 1.39935 1.34812 1.39935 + 0.02367 4.29767 4.62666 1.02132 0.44647 1.64817 0.21369 + 13 0.46807 2.44693 1.91775 1.96401 20 a A - , + 1.38629 1.38629 1.38629 1.38629 + 0.01893 4.72039 4.62082 1.46634 0.26236 0.41681 1.07631 + 14 2.55473 0.83543 1.03807 2.00646 21 c c - < + 1.38629 1.38629 1.38629 1.38629 + 0.01755 4.74471 4.74471 1.46634 0.26236 1.13291 0.38875 + 15 2.39320 0.46353 2.32167 1.70653 22 c c - < + 1.45283 1.09532 1.57794 1.49050 + 0.05776 3.04853 4.74471 0.87955 0.53609 1.03335 0.43977 + 16 0.99836 2.22160 1.07887 1.69771 27 a a - - + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 17 1.65807 1.64143 0.81511 1.75330 28 g g - < + 1.32750 1.40669 1.40669 1.40669 + 0.02485 4.14453 4.74562 0.87729 0.53770 1.09861 0.40547 + 18 1.74878 1.07763 1.33154 1.50715 30 c c - < + 1.36832 1.40719 1.40719 1.36333 + 0.02503 4.13325 4.74562 0.86889 0.54372 1.09861 0.40547 + 19 1.11114 1.72950 1.24664 1.58000 32 a g - < + 1.36453 1.37249 1.40441 1.40441 + 0.02402 4.19685 4.74562 0.91728 0.51017 1.09861 0.40547 + 20 1.75962 1.21469 1.33724 1.31484 34 c c - < + 1.34930 1.39894 1.39894 1.39894 + 0.02204 4.33423 4.74562 1.03089 0.44113 1.09861 0.40547 + 21 2.10367 2.26221 0.69525 1.29127 36 g g - < + 1.23194 1.49181 1.35346 1.49181 + 0.09167 3.83584 2.71797 1.21903 0.35029 1.09861 0.40547 + 22 1.08122 2.07379 0.96621 1.86701 40 g a - - + 1.38629 1.38629 1.38629 1.38629 + 0.01861 4.68660 4.68660 1.46634 0.26236 0.30116 1.34692 + 23 0.65069 1.92638 2.07928 1.57205 41 a A - - + 1.25691 1.56831 1.56831 1.20840 + 0.05734 3.06802 4.68691 2.01181 0.14358 0.97567 0.47311 + 24 0.77013 1.57188 2.38564 1.43817 52 a a - - + 1.40004 1.40004 1.34615 1.40004 + 0.02963 4.30295 4.15605 1.00560 0.45543 0.96359 0.48049 + 25 1.08270 1.81905 0.98134 2.08480 54 g g - < + 1.38629 1.38629 1.38629 1.38629 + 0.01935 4.73865 4.56477 1.46634 0.26236 0.78701 0.60734 + 26 2.77791 0.32597 3.23107 1.73450 55 c c - < + 1.37317 1.36923 1.40162 1.40162 + 0.02306 4.26226 4.74336 0.97163 0.47557 1.03813 0.43713 + 27 0.59716 2.50150 1.63951 1.75105 57 a A - - + 1.68144 1.28909 1.43347 1.20433 + 0.06658 3.11247 3.91592 2.07020 0.13486 1.13318 0.38862 + 28 3.45772 0.15214 3.28331 2.62934 68 c C - - + 1.24691 1.52452 1.28843 1.51786 + 0.06417 3.05117 4.20964 0.68732 0.69901 0.60688 0.78756 + 29 2.54112 0.43659 2.14754 1.84379 71 c c - < + 1.38629 1.38629 1.38629 1.38629 + 0.02064 4.73962 4.44910 1.46634 0.26236 1.17883 0.36765 + 30 1.75760 1.28076 1.06455 1.58555 72 g g - < + 1.38629 1.38629 1.38629 1.38629 + 0.01767 4.73785 4.73785 1.46634 0.26236 0.83716 0.56729 + 31 2.45757 2.10516 0.32641 2.64477 73 g G - < + 1.38629 1.61608 1.38629 1.19957 + 0.04126 3.46755 4.68520 1.40188 0.28254 1.16513 0.37379 + 32 0.68847 2.43846 1.70166 1.47850 78 a a - < + 1.39953 1.34760 1.39953 1.39953 + 0.04835 4.31604 3.38601 1.01709 0.44886 1.18379 0.36545 + 33 2.02827 2.44383 2.92174 0.31777 80 u u - < + 1.38629 1.38629 1.38629 1.38629 + 0.01803 4.71785 4.71785 1.46634 0.26236 1.72111 0.19707 + 34 3.56015 0.19950 3.88725 2.02556 81 c C - < + 1.35718 1.41983 1.23310 1.56316 + 0.05788 3.17887 4.22665 1.23421 0.34399 1.80037 0.18061 + 35 2.77743 0.33393 2.97305 1.76872 86 c C - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01812 4.71305 4.71305 1.46634 0.26236 1.40817 0.28050 + 36 3.98420 0.10391 3.40107 3.06287 87 C C - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01801 4.71875 4.71875 1.46634 0.26236 1.80037 0.18061 + 37 0.60764 2.71262 1.22750 2.34366 88 a A - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01801 4.71875 4.71875 1.46634 0.26236 1.80037 0.18061 + 38 3.23088 3.18734 3.87694 0.10705 89 u U - _ + 1.38629 1.38629 1.38629 1.38629 + 0.03159 4.71875 3.80896 1.46634 0.26236 1.74175 0.19263 + 39 2.16110 0.61914 2.26925 1.41468 90 c C - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01825 4.70596 4.70596 1.46634 0.26236 1.93795 0.15548 + 40 1.23660 0.86177 2.56859 1.55791 91 c c - _ + 1.77144 0.52247 2.28261 2.00365 + 0.48567 0.98086 4.63299 0.38856 1.13330 1.09154 0.40902 + 41 2.95007 4.22451 0.08802 4.05756 99 G G - > + 1.38629 1.38629 1.38629 1.38629 + 0.01855 4.71928 4.66066 1.46634 0.26236 1.79035 0.18260 + 42 0.19851 2.48110 3.36603 2.78270 100 a a - > + 1.40690 1.32690 1.40690 1.40690 + 0.02561 4.11282 4.71875 0.87369 0.54027 0.56674 0.83787 + 43 0.56275 2.33321 2.68997 1.32616 102 a A - - + 1.38629 1.38629 1.38629 1.38629 + 0.01763 4.74024 4.74024 1.46634 0.26236 1.28599 0.32349 + 44 3.39375 0.12438 4.17735 2.68773 103 c C - - + 1.38629 1.38629 1.38629 1.38629 + 0.01763 4.74024 4.74024 1.46634 0.26236 0.98476 0.46765 + 45 1.56019 1.81908 2.57774 0.59460 104 u u - > + 1.38629 1.38629 1.38629 1.38629 + 0.01757 4.74330 4.74330 1.46634 0.26236 1.12516 0.39245 + 46 2.56361 0.38721 1.86761 2.41311 105 c C - > + 1.38629 1.38629 1.38629 1.38629 + 0.01756 4.74383 4.74383 1.46634 0.26236 1.16513 0.37379 + 47 1.65610 0.99848 1.23540 1.89738 106 c c - > + 1.39034 1.39806 1.42179 1.33693 + 0.03045 3.84973 4.74383 0.68205 0.70437 0.97754 0.47198 + 48 2.00597 3.06134 0.26182 3.01610 108 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.02119 4.74562 4.39977 1.46634 0.26236 1.09861 0.40547 + 49 0.58998 1.58170 2.46808 1.86247 109 a A - - + 1.46483 1.46483 1.42461 1.21347 + 0.04703 3.29086 4.74203 0.41677 1.07638 0.88179 0.53450 + 50 0.30089 2.06397 2.82571 2.60877 111 a A - - + 1.38629 1.38629 1.38629 1.38629 + 0.01853 4.74562 4.63915 1.46634 0.26236 1.09861 0.40547 + 51 2.65105 3.53770 0.14206 3.41803 112 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01755 4.74465 4.74465 1.46634 0.26236 1.13538 0.38758 + 52 2.25374 1.86995 2.96108 0.37237 113 u u - > + 1.38629 1.38629 1.38629 1.38629 + 0.02119 4.74465 4.40076 1.46634 0.26236 1.13538 0.38758 + 53 2.30451 2.06868 1.68368 0.53077 114 u U - - + 1.38629 1.38629 1.38629 1.38629 + 0.01761 4.74107 4.74107 1.46634 0.26236 1.15277 0.37945 + 54 0.07177 3.47955 4.08658 3.83347 115 A A - - + 1.38629 1.38629 1.38629 1.38629 + 0.01760 4.74205 4.74205 1.46634 0.26236 1.22683 0.34703 + 55 0.07030 3.55771 4.11216 3.77171 116 A A - - + 1.35190 1.39803 1.39803 1.39803 + 0.02925 4.35534 4.13613 1.05273 0.42922 1.22683 0.34703 + 56 1.71014 3.01237 0.32835 2.99827 118 g G - - + 1.85597 1.60694 0.90297 1.43618 + 0.08251 2.66512 4.64522 2.63460 0.07445 0.63738 0.75221 + 57 2.17286 0.32500 2.86949 2.23591 136 c c - > + 1.38629 1.38629 1.38629 1.38629 + 0.01855 4.74481 4.63738 1.46634 0.26236 1.03982 0.43621 + 58 1.54608 1.38572 1.03143 1.71325 137 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01755 4.74464 4.74464 1.46634 0.26236 1.02829 0.44257 + 59 1.73179 1.41448 1.67883 0.93296 138 u c - > + 1.38629 1.38629 1.38629 1.38629 + 0.01820 4.74562 4.67277 1.46634 0.26236 1.09861 0.40547 + 60 1.88051 1.48253 0.89529 1.55148 139 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01802 4.74497 4.69262 1.46634 0.26236 1.05064 0.43034 + 61 1.83989 0.89343 1.57228 1.49458 140 c c - > + 1.38629 1.38629 1.38629 1.38629 + 0.02062 4.74516 4.44689 1.46634 0.26236 1.06402 0.42322 + 62 1.95525 1.32639 2.27770 0.71229 141 u U - - + 1.38629 1.38629 1.38629 1.38629 + 0.01759 4.74260 4.74260 1.46634 0.26236 0.90996 0.51507 + 63 1.17195 1.68298 2.18999 0.93522 142 u u - - + 1.42850 1.37453 1.30763 1.44006 + 0.04330 3.39069 4.74562 0.45444 1.00732 1.09861 0.40547 + 64 2.06137 2.80967 0.29508 2.68794 144 g g - > + 1.39830 1.35112 1.39830 1.39830 + 0.02181 4.35145 4.74562 1.04608 0.43281 1.09861 0.40547 + 65 2.09444 1.16363 0.78407 2.22593 146 g g - > + 1.36526 1.42987 1.32423 1.42987 + 0.02827 3.99693 4.65608 0.93847 0.49631 1.09861 0.40547 + 66 2.69864 3.10988 0.17720 2.98611 149 g g - < + 1.44804 1.49525 1.55952 1.10703 + 0.04988 3.21982 4.74481 0.88506 0.53220 1.03982 0.43621 + 67 2.75316 0.33473 2.45264 2.00485 153 c C - < + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 68 2.10133 1.01237 1.16670 1.59476 154 c c - < + 1.35433 1.34384 1.40173 1.44874 + 0.03127 3.81223 4.74562 0.88206 0.53432 1.09861 0.40547 + 69 1.06718 1.90576 1.25097 1.50920 157 a a - - + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 70 1.17832 2.48536 0.67247 2.31807 158 g g - < + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 71 1.12397 2.13330 1.34852 1.21421 159 a g - < + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 72 1.92362 1.97652 0.76090 1.39381 160 g g - - + 1.38629 1.38629 1.38629 1.38629 + 0.19341 4.74562 1.78878 1.46634 0.26236 1.09861 0.40547 + 73 1.49538 1.94718 2.24044 0.64103 161 u U - - + 1.38629 1.38629 1.38629 1.38629 + 0.19486 4.57308 1.79140 1.46634 0.26236 3.05575 0.04823 + 74 0.45457 2.40448 2.17182 1.82635 162 a A - - + 1.25913 1.51334 1.51334 1.28825 + 0.08684 2.64599 4.40301 0.39629 1.11722 0.32782 1.27474 + 75 2.93299 3.01855 0.13952 3.57151 165 g G - - + 1.38629 1.38629 1.38629 1.38629 + 0.01888 4.67199 4.67199 1.46634 0.26236 0.20256 1.69628 + 76 3.99358 2.92642 4.19781 0.09107 166 U U - < + 1.38629 1.38629 1.38629 1.38629 + 0.01754 4.74521 4.74521 1.46634 0.26236 1.11450 0.39761 + 77 0.11617 3.11470 3.66144 3.22913 167 a A - - + 1.38629 1.38629 1.38629 1.38629 + 0.03608 4.74521 3.62152 1.46634 0.26236 1.11450 0.39761 + 78 2.84125 0.35510 1.85871 2.46899 168 c c - < + 1.38629 1.38629 1.38629 1.38629 + 0.01843 4.72699 4.66629 1.46634 0.26236 1.63398 0.21710 + 79 3.31819 2.37733 3.05633 0.19368 169 u u - < + 1.38629 1.38629 1.38629 1.38629 + 0.02382 4.72644 4.22112 1.46634 0.26236 1.34190 0.30293 + 80 1.03409 2.18770 1.15649 1.52470 170 a a - - + 1.38629 1.38629 1.38629 1.38629 + 0.01792 4.72376 4.72376 1.46634 0.26236 1.06120 0.42471 + 81 1.86872 1.58634 0.86303 1.51811 171 g g - - + 1.38629 1.38629 1.38629 1.38629 + 0.02599 4.73172 4.08388 1.46634 0.26236 1.33266 0.30622 + 82 1.78916 2.52942 0.37772 2.69164 172 g G - - + 1.38629 1.38629 1.38629 1.38629 + 0.14347 4.72552 2.08118 1.46634 0.26236 1.35133 0.29961 + 83 0.89061 2.00611 1.13404 2.01474 173 a a - - + 1.34036 1.40209 1.40209 1.40209 + 0.16534 4.10702 1.99558 1.13868 0.38602 1.47891 0.25862 + 84 2.50495 2.45089 2.96402 0.24783 176 u U - - + 1.38629 1.38629 1.38629 1.38629 + 0.04231 4.50624 3.49387 1.46634 0.26236 0.12430 2.14660 + 85 1.96114 2.03452 0.46600 2.29211 177 g G - < + 1.38629 1.38629 1.38629 1.38629 + 0.01800 4.71951 4.71951 1.46634 0.26236 1.78612 0.18345 + 86 1.96207 1.76997 0.59462 1.98542 178 g g - < + 1.38629 1.38629 1.38629 1.38629 + 0.06564 4.71951 2.90743 1.46634 0.26236 1.68897 0.20421 + 87 2.48356 3.14584 0.23004 2.53801 179 g G - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01885 4.67369 4.67369 1.46634 0.26236 1.45718 0.26513 + 88 1.55291 1.80348 1.69102 0.82254 180 u u - _ + 1.38629 1.38629 1.38629 1.38629 + 0.02185 4.68814 4.38920 1.46634 0.26236 0.91915 0.50892 + 89 1.98709 2.38814 0.43269 2.10089 181 g G - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01940 4.71010 4.58486 1.46634 0.26236 1.66421 0.20991 + 90 0.48856 2.23554 2.05100 1.89081 182 a A - _ + 1.38629 1.38629 1.38629 1.38629 + 0.04725 4.71185 3.29255 1.46634 0.26236 0.71674 0.67010 + 91 2.08360 0.69276 1.61378 1.73617 183 c c - > + 1.38629 1.38629 1.38629 1.38629 + 0.02481 4.20562 4.64669 0.95696 0.48460 1.07845 0.41570 + 92 2.58338 0.49017 1.71456 2.02558 185 c C - > + 1.41519 1.37531 1.38981 1.36557 + 0.05835 3.94245 3.28929 0.75530 0.63463 1.29897 0.31857 + 93 1.44648 1.55255 1.47019 1.13006 187 u u - - + 1.38629 1.38629 1.38629 1.38629 + 0.02238 4.69570 4.34314 1.46634 0.26236 0.91028 0.51486 + 94 2.44101 0.55901 2.21621 1.46046 188 c C - - + 1.38629 1.38629 1.38629 1.38629 + 0.01922 4.71416 4.59873 1.46634 0.26236 1.56388 0.23486 + 95 1.61020 0.94090 1.43457 1.76225 189 c c - - + 1.38629 1.38629 1.38629 1.38629 + 0.03993 4.71641 3.50002 1.46634 0.26236 1.58461 0.22945 + 96 2.18875 1.48910 2.33334 0.57023 190 u U - - + 1.38629 1.38629 1.38629 1.38629 + 0.03318 4.69762 3.74983 1.46634 0.26236 1.94079 0.15501 + 97 2.22832 2.76588 0.27337 2.68011 191 g G - - + 1.39934 1.39934 1.39934 1.34815 + 0.03844 4.26291 3.74534 1.02156 0.44634 0.26192 1.46781 + 98 2.23154 2.13562 0.42666 2.10555 193 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01781 4.72977 4.72977 1.46634 0.26236 1.57070 0.23307 + 99 2.25826 3.31778 0.19553 3.30137 194 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01781 4.72977 4.72977 1.46634 0.26236 0.55476 0.85381 + 100 0.12870 3.11387 3.74780 2.94195 195 a A - - + 1.49330 1.35303 1.35303 1.35303 + 0.03095 3.82658 4.74521 1.21784 0.35079 1.11450 0.39761 + 101 0.52168 2.77619 1.31025 2.59762 199 a A - > + 1.42051 1.37646 1.33049 1.42051 + 0.50426 3.27205 1.02688 0.50867 0.91954 1.11450 0.39761 + 102 1.75707 1.65310 1.00247 1.31295 202 g g - - + 1.38629 1.38629 1.38629 1.38629 + 0.02709 4.31530 4.31530 1.46634 0.26236 0.06230 2.80676 + 103 0.98530 1.54075 2.13641 1.22290 203 a a - - + 1.38629 1.38629 1.38629 1.38629 + 0.01763 4.74029 4.74029 1.46634 0.26236 0.80681 0.59109 + 104 1.91716 0.92422 1.32032 1.66554 204 c c - > + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 105 2.25576 0.81533 2.15099 1.08962 205 c c - > + 1.38629 1.38629 1.38629 1.38629 + 0.03272 4.74562 3.75059 1.46634 0.26236 1.09861 0.40547 + 106 1.07692 1.79642 1.75375 1.13836 206 a a - - + 1.47073 1.41139 1.36911 1.30156 + 0.05759 3.16375 4.29051 0.98783 0.46583 0.55355 0.85544 + 107 1.77856 1.36913 0.90724 1.75358 211 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01762 4.74081 4.74081 1.46634 0.26236 1.26779 0.33052 + 108 2.13110 2.71915 0.28126 2.80460 212 g G - > + 1.37506 1.46572 1.39000 1.31980 + 0.04786 3.28070 4.69626 0.41342 1.08289 1.26779 0.33052 + 109 1.62498 2.09339 1.94510 0.62205 214 u u - > + 1.67585 0.77934 1.88169 1.60043 + 0.48638 0.97901 4.65939 0.06094 2.82815 0.84034 0.56486 + 110 1.68495 3.43968 0.29793 3.21597 217 g g - ) + 1.40521 1.33160 1.40521 1.40521 + 0.03979 4.17697 3.74368 0.90276 0.51995 1.14102 0.38492 + 111 2.20670 1.02757 2.06911 0.90199 219 u c - ) + 1.38629 1.38629 1.38629 1.38629 + 0.03044 4.72941 3.85623 1.46634 0.26236 0.89025 0.52857 + 112 2.56171 1.14811 2.54620 0.64015 220 u c - ) + 1.22072 1.44819 1.44819 1.44819 + 0.03025 3.90293 4.64488 1.01008 0.45286 1.57949 0.23077 + 113 2.42825 3.31342 0.17401 3.34863 223 g g - ) + 1.37346 1.36856 1.40182 1.40182 + 0.02545 4.23975 4.53557 0.96752 0.47807 0.78017 0.61309 + 114 2.28462 0.62468 2.31949 1.33017 225 c C - ) + 1.38629 1.38629 1.38629 1.38629 + 0.01814 4.73640 4.68744 1.46634 0.26236 1.26908 0.33002 + 115 0.95503 1.17607 1.96286 1.79422 226 a a - ) + 1.38629 1.38629 1.38629 1.38629 + 0.02255 4.73719 4.30270 1.46634 0.26236 1.31072 0.31420 + 116 1.24426 1.94348 1.35080 1.17245 227 u g - ) + 1.38629 1.38629 1.38629 1.38629 + 0.03289 4.73302 3.74836 1.46634 0.26236 1.49088 0.25512 + 117 1.61067 1.83596 1.13930 1.13712 228 u g - ) + 1.38629 1.38629 1.38629 1.38629 + 0.03308 4.71816 3.74632 1.46634 0.26236 1.68263 0.20565 + 118 1.74114 0.93694 1.54031 1.52079 229 c c - ) + 1.38629 1.38629 1.38629 1.38629 + 0.10437 4.70462 2.40738 1.46634 0.26236 2.03123 0.14061 + 119 1.80143 0.85215 2.05176 1.27320 230 c c - : + 1.38629 1.38629 1.38629 1.38629 + 0.01000 4.61025 * 1.46634 0.26236 0.00000 * +// \ No newline at end of file diff --git a/q2_types/hmmer/tests/data/hmms/rna_dna.hmm b/q2_types/hmmer/tests/data/hmms/rna_dna.hmm new file mode 100644 index 00000000..9a1a116a --- /dev/null +++ b/q2_types/hmmer/tests/data/hmms/rna_dna.hmm @@ -0,0 +1,647 @@ +HMMER3/f [3.1b2 | February 2015] +NAME 5S_rRNA +ACC RF00001 +DESC 5S ribosomal RNA +LENG 119 +MAXL 305 +ALPH RNA +RF yes +MM no +CONS yes +CS yes +MAP yes +DATE Sun Feb 19 15:28:58 2017 +NSEQ 712 +EFFN 9.307953 +CKSUM 242171328 +STATS LOCAL MSV -9.2495 0.71806 +STATS LOCAL VITERBI -10.6521 0.71806 +STATS LOCAL FORWARD -3.9747 0.71806 +HMM A C G U + m->m m->i m->d i->m i->i d->m d->d + COMPO 1.44549 1.37725 1.25977 1.47676 + 1.43153 1.34302 1.34302 1.43153 + 0.07274 4.09044 2.92951 1.07081 0.41966 0.00000 * + 1 1.54335 1.70220 1.04746 1.37348 3 g g - ( + 1.38629 1.38629 1.38629 1.38629 + 0.02796 4.69986 3.99134 1.46634 0.26236 0.98356 0.46837 + 2 1.69437 1.14964 1.46606 1.31416 4 c c - ( + 1.38629 1.38629 1.38629 1.38629 + 0.01877 4.70906 4.64822 1.46634 0.26236 1.12704 0.39155 + 3 1.25252 1.38627 1.88263 1.16469 5 u c - ( + 1.38629 1.38629 1.38629 1.38629 + 0.01965 4.72024 4.55191 1.46634 0.26236 0.80895 0.58937 + 4 1.72700 2.07424 1.11856 0.99484 6 u u - ( + 1.38629 1.38629 1.38629 1.38629 + 0.01870 4.73297 4.63281 1.46634 0.26236 0.66621 0.72083 + 5 1.54450 2.37555 0.52987 2.25440 7 g G - ( + 1.38629 1.38629 1.38629 1.38629 + 0.02087 4.74329 4.42737 1.46634 0.26236 1.07337 0.41833 + 6 2.71222 0.55655 2.16572 1.40340 8 c c - ( + 1.46123 1.34104 1.46123 1.29271 + 0.04754 3.32700 4.55416 0.43093 1.04954 1.06781 0.42123 + 7 1.88183 3.18268 0.27858 3.00864 10 g g - ( + 1.38629 1.38629 1.38629 1.38629 + 0.02914 4.74112 3.91233 1.46634 0.26236 1.10199 0.40378 + 8 1.21294 1.87108 0.91371 1.91267 11 g g - ( + 1.42544 1.35183 1.34543 1.42544 + 0.03222 3.77698 4.73128 0.64731 0.74118 1.45006 0.26730 + 9 2.92783 0.67317 2.82760 0.97486 13 c c - ( + 1.39851 1.35052 1.39851 1.39851 + 0.02477 4.33224 4.48084 1.04107 0.43553 1.51536 0.24811 + 10 2.18357 0.54013 1.84964 1.91465 15 c C - , + 1.37342 1.39919 1.39919 1.37371 + 0.02250 4.31140 4.72954 1.02501 0.44440 1.36766 0.29397 + 11 0.38365 2.42629 2.19201 2.13222 17 a A - , + 1.38629 1.38629 1.38629 1.38629 + 0.04294 4.73158 3.40456 1.46634 0.26236 0.62352 0.76799 + 12 2.20731 2.05846 2.30257 0.41196 18 u U - , + 1.39935 1.39935 1.34812 1.39935 + 0.02367 4.29767 4.62666 1.02132 0.44647 1.64817 0.21369 + 13 0.46807 2.44693 1.91775 1.96401 20 a A - , + 1.38629 1.38629 1.38629 1.38629 + 0.01893 4.72039 4.62082 1.46634 0.26236 0.41681 1.07631 + 14 2.55473 0.83543 1.03807 2.00646 21 c c - < + 1.38629 1.38629 1.38629 1.38629 + 0.01755 4.74471 4.74471 1.46634 0.26236 1.13291 0.38875 + 15 2.39320 0.46353 2.32167 1.70653 22 c c - < + 1.45283 1.09532 1.57794 1.49050 + 0.05776 3.04853 4.74471 0.87955 0.53609 1.03335 0.43977 + 16 0.99836 2.22160 1.07887 1.69771 27 a a - - + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 17 1.65807 1.64143 0.81511 1.75330 28 g g - < + 1.32750 1.40669 1.40669 1.40669 + 0.02485 4.14453 4.74562 0.87729 0.53770 1.09861 0.40547 + 18 1.74878 1.07763 1.33154 1.50715 30 c c - < + 1.36832 1.40719 1.40719 1.36333 + 0.02503 4.13325 4.74562 0.86889 0.54372 1.09861 0.40547 + 19 1.11114 1.72950 1.24664 1.58000 32 a g - < + 1.36453 1.37249 1.40441 1.40441 + 0.02402 4.19685 4.74562 0.91728 0.51017 1.09861 0.40547 + 20 1.75962 1.21469 1.33724 1.31484 34 c c - < + 1.34930 1.39894 1.39894 1.39894 + 0.02204 4.33423 4.74562 1.03089 0.44113 1.09861 0.40547 + 21 2.10367 2.26221 0.69525 1.29127 36 g g - < + 1.23194 1.49181 1.35346 1.49181 + 0.09167 3.83584 2.71797 1.21903 0.35029 1.09861 0.40547 + 22 1.08122 2.07379 0.96621 1.86701 40 g a - - + 1.38629 1.38629 1.38629 1.38629 + 0.01861 4.68660 4.68660 1.46634 0.26236 0.30116 1.34692 + 23 0.65069 1.92638 2.07928 1.57205 41 a A - - + 1.25691 1.56831 1.56831 1.20840 + 0.05734 3.06802 4.68691 2.01181 0.14358 0.97567 0.47311 + 24 0.77013 1.57188 2.38564 1.43817 52 a a - - + 1.40004 1.40004 1.34615 1.40004 + 0.02963 4.30295 4.15605 1.00560 0.45543 0.96359 0.48049 + 25 1.08270 1.81905 0.98134 2.08480 54 g g - < + 1.38629 1.38629 1.38629 1.38629 + 0.01935 4.73865 4.56477 1.46634 0.26236 0.78701 0.60734 + 26 2.77791 0.32597 3.23107 1.73450 55 c c - < + 1.37317 1.36923 1.40162 1.40162 + 0.02306 4.26226 4.74336 0.97163 0.47557 1.03813 0.43713 + 27 0.59716 2.50150 1.63951 1.75105 57 a A - - + 1.68144 1.28909 1.43347 1.20433 + 0.06658 3.11247 3.91592 2.07020 0.13486 1.13318 0.38862 + 28 3.45772 0.15214 3.28331 2.62934 68 c C - - + 1.24691 1.52452 1.28843 1.51786 + 0.06417 3.05117 4.20964 0.68732 0.69901 0.60688 0.78756 + 29 2.54112 0.43659 2.14754 1.84379 71 c c - < + 1.38629 1.38629 1.38629 1.38629 + 0.02064 4.73962 4.44910 1.46634 0.26236 1.17883 0.36765 + 30 1.75760 1.28076 1.06455 1.58555 72 g g - < + 1.38629 1.38629 1.38629 1.38629 + 0.01767 4.73785 4.73785 1.46634 0.26236 0.83716 0.56729 + 31 2.45757 2.10516 0.32641 2.64477 73 g G - < + 1.38629 1.61608 1.38629 1.19957 + 0.04126 3.46755 4.68520 1.40188 0.28254 1.16513 0.37379 + 32 0.68847 2.43846 1.70166 1.47850 78 a a - < + 1.39953 1.34760 1.39953 1.39953 + 0.04835 4.31604 3.38601 1.01709 0.44886 1.18379 0.36545 + 33 2.02827 2.44383 2.92174 0.31777 80 u u - < + 1.38629 1.38629 1.38629 1.38629 + 0.01803 4.71785 4.71785 1.46634 0.26236 1.72111 0.19707 + 34 3.56015 0.19950 3.88725 2.02556 81 c C - < + 1.35718 1.41983 1.23310 1.56316 + 0.05788 3.17887 4.22665 1.23421 0.34399 1.80037 0.18061 + 35 2.77743 0.33393 2.97305 1.76872 86 c C - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01812 4.71305 4.71305 1.46634 0.26236 1.40817 0.28050 + 36 3.98420 0.10391 3.40107 3.06287 87 C C - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01801 4.71875 4.71875 1.46634 0.26236 1.80037 0.18061 + 37 0.60764 2.71262 1.22750 2.34366 88 a A - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01801 4.71875 4.71875 1.46634 0.26236 1.80037 0.18061 + 38 3.23088 3.18734 3.87694 0.10705 89 u U - _ + 1.38629 1.38629 1.38629 1.38629 + 0.03159 4.71875 3.80896 1.46634 0.26236 1.74175 0.19263 + 39 2.16110 0.61914 2.26925 1.41468 90 c C - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01825 4.70596 4.70596 1.46634 0.26236 1.93795 0.15548 + 40 1.23660 0.86177 2.56859 1.55791 91 c c - _ + 1.77144 0.52247 2.28261 2.00365 + 0.48567 0.98086 4.63299 0.38856 1.13330 1.09154 0.40902 + 41 2.95007 4.22451 0.08802 4.05756 99 G G - > + 1.38629 1.38629 1.38629 1.38629 + 0.01855 4.71928 4.66066 1.46634 0.26236 1.79035 0.18260 + 42 0.19851 2.48110 3.36603 2.78270 100 a a - > + 1.40690 1.32690 1.40690 1.40690 + 0.02561 4.11282 4.71875 0.87369 0.54027 0.56674 0.83787 + 43 0.56275 2.33321 2.68997 1.32616 102 a A - - + 1.38629 1.38629 1.38629 1.38629 + 0.01763 4.74024 4.74024 1.46634 0.26236 1.28599 0.32349 + 44 3.39375 0.12438 4.17735 2.68773 103 c C - - + 1.38629 1.38629 1.38629 1.38629 + 0.01763 4.74024 4.74024 1.46634 0.26236 0.98476 0.46765 + 45 1.56019 1.81908 2.57774 0.59460 104 u u - > + 1.38629 1.38629 1.38629 1.38629 + 0.01757 4.74330 4.74330 1.46634 0.26236 1.12516 0.39245 + 46 2.56361 0.38721 1.86761 2.41311 105 c C - > + 1.38629 1.38629 1.38629 1.38629 + 0.01756 4.74383 4.74383 1.46634 0.26236 1.16513 0.37379 + 47 1.65610 0.99848 1.23540 1.89738 106 c c - > + 1.39034 1.39806 1.42179 1.33693 + 0.03045 3.84973 4.74383 0.68205 0.70437 0.97754 0.47198 + 48 2.00597 3.06134 0.26182 3.01610 108 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.02119 4.74562 4.39977 1.46634 0.26236 1.09861 0.40547 + 49 0.58998 1.58170 2.46808 1.86247 109 a A - - + 1.46483 1.46483 1.42461 1.21347 + 0.04703 3.29086 4.74203 0.41677 1.07638 0.88179 0.53450 + 50 0.30089 2.06397 2.82571 2.60877 111 a A - - + 1.38629 1.38629 1.38629 1.38629 + 0.01853 4.74562 4.63915 1.46634 0.26236 1.09861 0.40547 + 51 2.65105 3.53770 0.14206 3.41803 112 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01755 4.74465 4.74465 1.46634 0.26236 1.13538 0.38758 + 52 2.25374 1.86995 2.96108 0.37237 113 u u - > + 1.38629 1.38629 1.38629 1.38629 + 0.02119 4.74465 4.40076 1.46634 0.26236 1.13538 0.38758 + 53 2.30451 2.06868 1.68368 0.53077 114 u U - - + 1.38629 1.38629 1.38629 1.38629 + 0.01761 4.74107 4.74107 1.46634 0.26236 1.15277 0.37945 + 54 0.07177 3.47955 4.08658 3.83347 115 A A - - + 1.38629 1.38629 1.38629 1.38629 + 0.01760 4.74205 4.74205 1.46634 0.26236 1.22683 0.34703 + 55 0.07030 3.55771 4.11216 3.77171 116 A A - - + 1.35190 1.39803 1.39803 1.39803 + 0.02925 4.35534 4.13613 1.05273 0.42922 1.22683 0.34703 + 56 1.71014 3.01237 0.32835 2.99827 118 g G - - + 1.85597 1.60694 0.90297 1.43618 + 0.08251 2.66512 4.64522 2.63460 0.07445 0.63738 0.75221 + 57 2.17286 0.32500 2.86949 2.23591 136 c c - > + 1.38629 1.38629 1.38629 1.38629 + 0.01855 4.74481 4.63738 1.46634 0.26236 1.03982 0.43621 + 58 1.54608 1.38572 1.03143 1.71325 137 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01755 4.74464 4.74464 1.46634 0.26236 1.02829 0.44257 + 59 1.73179 1.41448 1.67883 0.93296 138 u c - > + 1.38629 1.38629 1.38629 1.38629 + 0.01820 4.74562 4.67277 1.46634 0.26236 1.09861 0.40547 + 60 1.88051 1.48253 0.89529 1.55148 139 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01802 4.74497 4.69262 1.46634 0.26236 1.05064 0.43034 + 61 1.83989 0.89343 1.57228 1.49458 140 c c - > + 1.38629 1.38629 1.38629 1.38629 + 0.02062 4.74516 4.44689 1.46634 0.26236 1.06402 0.42322 + 62 1.95525 1.32639 2.27770 0.71229 141 u U - - + 1.38629 1.38629 1.38629 1.38629 + 0.01759 4.74260 4.74260 1.46634 0.26236 0.90996 0.51507 + 63 1.17195 1.68298 2.18999 0.93522 142 u u - - + 1.42850 1.37453 1.30763 1.44006 + 0.04330 3.39069 4.74562 0.45444 1.00732 1.09861 0.40547 + 64 2.06137 2.80967 0.29508 2.68794 144 g g - > + 1.39830 1.35112 1.39830 1.39830 + 0.02181 4.35145 4.74562 1.04608 0.43281 1.09861 0.40547 + 65 2.09444 1.16363 0.78407 2.22593 146 g g - > + 1.36526 1.42987 1.32423 1.42987 + 0.02827 3.99693 4.65608 0.93847 0.49631 1.09861 0.40547 + 66 2.69864 3.10988 0.17720 2.98611 149 g g - < + 1.44804 1.49525 1.55952 1.10703 + 0.04988 3.21982 4.74481 0.88506 0.53220 1.03982 0.43621 + 67 2.75316 0.33473 2.45264 2.00485 153 c C - < + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 68 2.10133 1.01237 1.16670 1.59476 154 c c - < + 1.35433 1.34384 1.40173 1.44874 + 0.03127 3.81223 4.74562 0.88206 0.53432 1.09861 0.40547 + 69 1.06718 1.90576 1.25097 1.50920 157 a a - - + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 70 1.17832 2.48536 0.67247 2.31807 158 g g - < + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 71 1.12397 2.13330 1.34852 1.21421 159 a g - < + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 72 1.92362 1.97652 0.76090 1.39381 160 g g - - + 1.38629 1.38629 1.38629 1.38629 + 0.19341 4.74562 1.78878 1.46634 0.26236 1.09861 0.40547 + 73 1.49538 1.94718 2.24044 0.64103 161 u U - - + 1.38629 1.38629 1.38629 1.38629 + 0.19486 4.57308 1.79140 1.46634 0.26236 3.05575 0.04823 + 74 0.45457 2.40448 2.17182 1.82635 162 a A - - + 1.25913 1.51334 1.51334 1.28825 + 0.08684 2.64599 4.40301 0.39629 1.11722 0.32782 1.27474 + 75 2.93299 3.01855 0.13952 3.57151 165 g G - - + 1.38629 1.38629 1.38629 1.38629 + 0.01888 4.67199 4.67199 1.46634 0.26236 0.20256 1.69628 + 76 3.99358 2.92642 4.19781 0.09107 166 U U - < + 1.38629 1.38629 1.38629 1.38629 + 0.01754 4.74521 4.74521 1.46634 0.26236 1.11450 0.39761 + 77 0.11617 3.11470 3.66144 3.22913 167 a A - - + 1.38629 1.38629 1.38629 1.38629 + 0.03608 4.74521 3.62152 1.46634 0.26236 1.11450 0.39761 + 78 2.84125 0.35510 1.85871 2.46899 168 c c - < + 1.38629 1.38629 1.38629 1.38629 + 0.01843 4.72699 4.66629 1.46634 0.26236 1.63398 0.21710 + 79 3.31819 2.37733 3.05633 0.19368 169 u u - < + 1.38629 1.38629 1.38629 1.38629 + 0.02382 4.72644 4.22112 1.46634 0.26236 1.34190 0.30293 + 80 1.03409 2.18770 1.15649 1.52470 170 a a - - + 1.38629 1.38629 1.38629 1.38629 + 0.01792 4.72376 4.72376 1.46634 0.26236 1.06120 0.42471 + 81 1.86872 1.58634 0.86303 1.51811 171 g g - - + 1.38629 1.38629 1.38629 1.38629 + 0.02599 4.73172 4.08388 1.46634 0.26236 1.33266 0.30622 + 82 1.78916 2.52942 0.37772 2.69164 172 g G - - + 1.38629 1.38629 1.38629 1.38629 + 0.14347 4.72552 2.08118 1.46634 0.26236 1.35133 0.29961 + 83 0.89061 2.00611 1.13404 2.01474 173 a a - - + 1.34036 1.40209 1.40209 1.40209 + 0.16534 4.10702 1.99558 1.13868 0.38602 1.47891 0.25862 + 84 2.50495 2.45089 2.96402 0.24783 176 u U - - + 1.38629 1.38629 1.38629 1.38629 + 0.04231 4.50624 3.49387 1.46634 0.26236 0.12430 2.14660 + 85 1.96114 2.03452 0.46600 2.29211 177 g G - < + 1.38629 1.38629 1.38629 1.38629 + 0.01800 4.71951 4.71951 1.46634 0.26236 1.78612 0.18345 + 86 1.96207 1.76997 0.59462 1.98542 178 g g - < + 1.38629 1.38629 1.38629 1.38629 + 0.06564 4.71951 2.90743 1.46634 0.26236 1.68897 0.20421 + 87 2.48356 3.14584 0.23004 2.53801 179 g G - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01885 4.67369 4.67369 1.46634 0.26236 1.45718 0.26513 + 88 1.55291 1.80348 1.69102 0.82254 180 u u - _ + 1.38629 1.38629 1.38629 1.38629 + 0.02185 4.68814 4.38920 1.46634 0.26236 0.91915 0.50892 + 89 1.98709 2.38814 0.43269 2.10089 181 g G - _ + 1.38629 1.38629 1.38629 1.38629 + 0.01940 4.71010 4.58486 1.46634 0.26236 1.66421 0.20991 + 90 0.48856 2.23554 2.05100 1.89081 182 a A - _ + 1.38629 1.38629 1.38629 1.38629 + 0.04725 4.71185 3.29255 1.46634 0.26236 0.71674 0.67010 + 91 2.08360 0.69276 1.61378 1.73617 183 c c - > + 1.38629 1.38629 1.38629 1.38629 + 0.02481 4.20562 4.64669 0.95696 0.48460 1.07845 0.41570 + 92 2.58338 0.49017 1.71456 2.02558 185 c C - > + 1.41519 1.37531 1.38981 1.36557 + 0.05835 3.94245 3.28929 0.75530 0.63463 1.29897 0.31857 + 93 1.44648 1.55255 1.47019 1.13006 187 u u - - + 1.38629 1.38629 1.38629 1.38629 + 0.02238 4.69570 4.34314 1.46634 0.26236 0.91028 0.51486 + 94 2.44101 0.55901 2.21621 1.46046 188 c C - - + 1.38629 1.38629 1.38629 1.38629 + 0.01922 4.71416 4.59873 1.46634 0.26236 1.56388 0.23486 + 95 1.61020 0.94090 1.43457 1.76225 189 c c - - + 1.38629 1.38629 1.38629 1.38629 + 0.03993 4.71641 3.50002 1.46634 0.26236 1.58461 0.22945 + 96 2.18875 1.48910 2.33334 0.57023 190 u U - - + 1.38629 1.38629 1.38629 1.38629 + 0.03318 4.69762 3.74983 1.46634 0.26236 1.94079 0.15501 + 97 2.22832 2.76588 0.27337 2.68011 191 g G - - + 1.39934 1.39934 1.39934 1.34815 + 0.03844 4.26291 3.74534 1.02156 0.44634 0.26192 1.46781 + 98 2.23154 2.13562 0.42666 2.10555 193 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01781 4.72977 4.72977 1.46634 0.26236 1.57070 0.23307 + 99 2.25826 3.31778 0.19553 3.30137 194 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01781 4.72977 4.72977 1.46634 0.26236 0.55476 0.85381 + 100 0.12870 3.11387 3.74780 2.94195 195 a A - - + 1.49330 1.35303 1.35303 1.35303 + 0.03095 3.82658 4.74521 1.21784 0.35079 1.11450 0.39761 + 101 0.52168 2.77619 1.31025 2.59762 199 a A - > + 1.42051 1.37646 1.33049 1.42051 + 0.50426 3.27205 1.02688 0.50867 0.91954 1.11450 0.39761 + 102 1.75707 1.65310 1.00247 1.31295 202 g g - - + 1.38629 1.38629 1.38629 1.38629 + 0.02709 4.31530 4.31530 1.46634 0.26236 0.06230 2.80676 + 103 0.98530 1.54075 2.13641 1.22290 203 a a - - + 1.38629 1.38629 1.38629 1.38629 + 0.01763 4.74029 4.74029 1.46634 0.26236 0.80681 0.59109 + 104 1.91716 0.92422 1.32032 1.66554 204 c c - > + 1.38629 1.38629 1.38629 1.38629 + 0.01753 4.74562 4.74562 1.46634 0.26236 1.09861 0.40547 + 105 2.25576 0.81533 2.15099 1.08962 205 c c - > + 1.38629 1.38629 1.38629 1.38629 + 0.03272 4.74562 3.75059 1.46634 0.26236 1.09861 0.40547 + 106 1.07692 1.79642 1.75375 1.13836 206 a a - - + 1.47073 1.41139 1.36911 1.30156 + 0.05759 3.16375 4.29051 0.98783 0.46583 0.55355 0.85544 + 107 1.77856 1.36913 0.90724 1.75358 211 g g - > + 1.38629 1.38629 1.38629 1.38629 + 0.01762 4.74081 4.74081 1.46634 0.26236 1.26779 0.33052 + 108 2.13110 2.71915 0.28126 2.80460 212 g G - > + 1.37506 1.46572 1.39000 1.31980 + 0.04786 3.28070 4.69626 0.41342 1.08289 1.26779 0.33052 + 109 1.62498 2.09339 1.94510 0.62205 214 u u - > + 1.67585 0.77934 1.88169 1.60043 + 0.48638 0.97901 4.65939 0.06094 2.82815 0.84034 0.56486 + 110 1.68495 3.43968 0.29793 3.21597 217 g g - ) + 1.40521 1.33160 1.40521 1.40521 + 0.03979 4.17697 3.74368 0.90276 0.51995 1.14102 0.38492 + 111 2.20670 1.02757 2.06911 0.90199 219 u c - ) + 1.38629 1.38629 1.38629 1.38629 + 0.03044 4.72941 3.85623 1.46634 0.26236 0.89025 0.52857 + 112 2.56171 1.14811 2.54620 0.64015 220 u c - ) + 1.22072 1.44819 1.44819 1.44819 + 0.03025 3.90293 4.64488 1.01008 0.45286 1.57949 0.23077 + 113 2.42825 3.31342 0.17401 3.34863 223 g g - ) + 1.37346 1.36856 1.40182 1.40182 + 0.02545 4.23975 4.53557 0.96752 0.47807 0.78017 0.61309 + 114 2.28462 0.62468 2.31949 1.33017 225 c C - ) + 1.38629 1.38629 1.38629 1.38629 + 0.01814 4.73640 4.68744 1.46634 0.26236 1.26908 0.33002 + 115 0.95503 1.17607 1.96286 1.79422 226 a a - ) + 1.38629 1.38629 1.38629 1.38629 + 0.02255 4.73719 4.30270 1.46634 0.26236 1.31072 0.31420 + 116 1.24426 1.94348 1.35080 1.17245 227 u g - ) + 1.38629 1.38629 1.38629 1.38629 + 0.03289 4.73302 3.74836 1.46634 0.26236 1.49088 0.25512 + 117 1.61067 1.83596 1.13930 1.13712 228 u g - ) + 1.38629 1.38629 1.38629 1.38629 + 0.03308 4.71816 3.74632 1.46634 0.26236 1.68263 0.20565 + 118 1.74114 0.93694 1.54031 1.52079 229 c c - ) + 1.38629 1.38629 1.38629 1.38629 + 0.10437 4.70462 2.40738 1.46634 0.26236 2.03123 0.14061 + 119 1.80143 0.85215 2.05176 1.27320 230 c c - : + 1.38629 1.38629 1.38629 1.38629 + 0.01000 4.61025 * 1.46634 0.26236 0.00000 * +// +HMMER3/f [3.1 | February 2013] +NAME MADE1 +ACC DF0000629.2 +DESC MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon +LENG 80 +MAXL 426 +ALPH DNA +RF yes +MM no +CONS yes +CS no +MAP yes +DATE Tue Feb 19 20:33:41 2013 +NSEQ 1997 +EFFN 3.911818 +CKSUM 3015610723 +STATS LOCAL MSV -8.5786 0.71858 +STATS LOCAL VITERBI -9.3632 0.71858 +STATS LOCAL FORWARD -3.4823 0.71858 +HMM A C G T + m->m m->i m->d i->m i->i d->m d->d + COMPO 1.24257 1.59430 1.62906 1.16413 + 1.38629 1.38629 1.38629 1.38629 + 0.03960 3.94183 3.94183 1.46634 0.26236 0.00000 * + 1 2.69765 2.44396 2.81521 0.24089 1 t x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03960 3.94183 3.94183 1.46634 0.26236 1.09861 0.40547 + 2 2.72939 2.37873 2.85832 0.24244 2 t x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03725 4.00179 4.00179 1.46634 0.26236 1.09861 0.40547 + 3 0.16099 3.16370 2.87328 2.99734 3 a x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03604 4.03416 4.03416 1.46634 0.26236 1.09861 0.40547 + 4 1.98862 2.42132 0.42649 2.10770 4 g x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03539 4.05203 4.05203 1.46634 0.26236 1.09861 0.40547 + 5 1.96369 2.69532 0.36534 2.32099 5 g x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03764 4.06427 3.92372 1.46634 0.26236 1.09861 0.40547 + 6 2.56994 2.11239 2.71946 0.30571 6 t x - - + 1.37159 1.41129 1.39124 1.37159 + 0.03806 3.89715 4.07214 1.50442 0.25122 1.00714 0.45454 + 7 2.58388 2.10353 2.64646 0.31253 12 t x - - + 1.38764 1.38524 1.38764 1.38465 + 0.03494 4.03864 4.09125 1.40070 0.28293 1.09237 0.40860 + 8 2.18552 2.70201 0.28821 2.64645 14 g x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03628 4.09157 3.96779 1.46634 0.26236 1.09861 0.40547 + 9 2.16916 2.82142 0.28427 2.60854 15 g x - - + 1.38091 1.39033 1.38365 1.39033 + 0.03566 4.00237 4.08886 1.38021 0.28972 1.01958 0.44745 + 10 2.45517 2.15232 2.42886 0.34277 18 t x - - + 1.39065 1.39065 1.39065 1.37335 + 0.03536 4.01212 4.09576 1.39554 0.28462 1.09775 0.40589 + 11 2.10260 2.95484 0.28160 2.64222 21 g x - - + 1.36740 1.40555 1.40555 1.36740 + 0.03843 3.92069 4.02468 1.44733 0.26814 1.09856 0.40549 + 12 2.54740 0.30185 2.61355 2.21647 26 c x - - + 1.38748 1.38276 1.38748 1.38748 + 0.03457 4.05446 4.09623 1.40847 0.28040 1.05496 0.42803 + 13 0.28443 2.72003 2.32214 2.48149 28 a x - - + 1.38740 1.38740 1.38298 1.38740 + 0.03441 4.05976 4.10001 1.41198 0.27926 1.09780 0.40587 + 14 0.29412 2.55413 2.49679 2.35701 30 a x - - + 1.38194 1.39067 1.38194 1.39067 + 0.03505 4.02482 4.10005 1.39522 0.28473 1.09929 0.40512 + 15 0.18837 2.99710 2.82270 2.77556 33 a x - - + 1.39015 1.39472 1.37503 1.38539 + 0.03725 3.97815 4.02618 1.37955 0.28994 1.10102 0.40426 + 16 0.50816 2.05151 2.22111 1.82407 37 a x - - + 1.36727 1.38730 1.39683 1.39405 + 0.04830 3.89881 3.61610 1.29026 0.32186 1.05306 0.42905 + 17 2.11260 2.73141 0.29747 2.64152 41 g x - - + 1.36913 1.40376 1.40376 1.36913 + 0.03705 3.93681 4.08299 1.44872 0.26771 1.07479 0.41759 + 18 2.24459 1.90539 2.34054 0.43234 46 t x - - + 1.33632 1.42493 1.39937 1.38665 + 0.04427 3.64574 4.06297 1.70501 0.20061 1.21309 0.35279 + 19 0.44322 2.17202 2.18055 2.03175 57 a x - - + 1.41047 1.41471 1.36338 1.35797 + 0.03970 3.81957 4.07540 1.65588 0.21186 1.22788 0.34660 + 20 0.33340 2.42691 2.40824 2.25160 66 a x - - + 1.29389 1.44615 1.37917 1.43324 + 0.04223 3.70146 4.09459 1.55158 0.23815 1.05880 0.42598 + 21 2.50563 1.98543 2.69601 0.33746 74 t x - - + 1.39462 1.39462 1.42862 1.32990 + 0.04184 3.80216 3.98177 1.80466 0.17976 1.00279 0.45705 + 22 2.54484 1.97505 2.66483 0.33806 84 t x - - + 1.39134 1.39489 1.38662 1.37246 + 0.03877 3.97504 3.95038 1.37620 0.29107 1.13932 0.38572 + 23 2.10159 2.83856 0.29282 2.61635 88 g x - - + 1.39682 1.39682 1.35536 1.39682 + 0.05046 3.75402 3.65808 1.08330 0.41321 1.13019 0.39004 + 24 2.25298 0.61854 2.50691 1.29221 90 c x - - + 1.35803 1.49605 1.46737 1.24379 + 0.06091 3.28322 3.83564 1.89752 0.16245 1.28788 0.32276 + 25 1.27819 2.23285 0.76242 1.91259 106 g x - - + 1.29024 1.67349 1.68279 1.04597 + 0.05752 3.44263 3.73311 2.58671 0.07825 1.26818 0.33037 + 26 1.86925 2.58352 0.39466 2.33986 131 g x - - + 1.31084 1.49412 1.46666 1.29002 + 0.04698 3.54257 4.07715 2.25245 0.11109 0.86163 0.54900 + 27 2.38297 1.93394 2.39162 0.39800 151 t x - - + 1.33582 1.47359 1.44163 1.30411 + 0.04951 3.48445 4.03783 2.15951 0.12260 1.21681 0.35122 + 28 2.41717 2.17810 2.62774 0.32113 170 t x - - + 1.36805 1.48060 1.37439 1.32840 + 0.04849 3.50958 4.05014 2.58370 0.07850 1.22399 0.34822 + 29 2.57764 2.35132 2.56552 0.28512 194 t x - - + 1.43829 1.43458 1.24787 1.43829 + 0.04667 3.56670 4.05428 2.49706 0.08591 1.23744 0.34267 + 30 2.47248 2.07688 2.62257 0.33172 215 t x - - + 1.25120 1.52623 1.70635 1.15531 + 0.08932 3.31524 3.01336 2.81842 0.06156 1.22909 0.34610 + 31 2.25937 2.13157 2.02027 0.43957 248 t x - - + 1.18172 1.43522 1.72841 1.28150 + 0.07936 2.93117 3.77395 2.46269 0.08906 0.60457 0.79034 + 32 2.04508 2.84981 0.30490 2.58263 280 g x - - + 1.17665 1.66785 1.66218 1.16056 + 0.05998 3.23615 3.96853 2.83684 0.06040 1.01952 0.44749 + 33 2.45103 0.38098 2.56776 1.87147 317 c x - - + 1.24153 1.52524 1.60663 1.22783 + 0.05538 3.39046 3.90294 2.73920 0.06680 1.18729 0.36391 + 34 2.22082 0.36258 2.75077 2.02704 347 c x - - + 1.15008 1.62014 1.86511 1.10673 + 0.06086 3.18178 4.04341 2.94504 0.05403 1.25991 0.33363 + 35 0.27033 2.66664 2.52541 2.43767 388 a x - - + 1.24951 1.47565 1.41392 1.42074 + 0.07123 3.00373 3.95552 3.13655 0.04440 1.28173 0.32512 + 36 2.83107 2.41670 2.97197 0.22235 439 t x - - + 1.37071 1.57683 1.38637 1.23972 + 0.05293 3.45216 3.91807 2.54402 0.08181 1.14651 0.38235 + 37 2.52322 2.25084 2.45909 0.31611 465 t x - - + 1.26335 1.55077 1.59008 1.19965 + 0.07504 3.13329 3.55006 3.08962 0.04659 1.13108 0.38962 + 38 0.45807 2.30687 1.98940 2.03143 512 a x - - + 1.15472 1.67511 1.53797 1.26320 + 0.09820 3.13076 2.99876 2.79197 0.06326 1.39915 0.28343 + 39 2.37471 0.42180 2.44763 1.80427 550 c x - - + 1.23785 1.49058 1.48364 1.35502 + 0.06081 3.19472 4.01643 2.41851 0.09327 0.94671 0.49105 + 40 2.32826 1.95481 2.36781 0.40458 578 t x - - + 1.36586 1.46001 1.43000 1.29720 + 0.05257 3.39673 4.03256 1.84862 0.17133 1.40979 0.27997 + 41 2.68669 2.13935 2.81520 0.28200 592 t x - - + 1.34965 1.42793 1.45781 1.31633 + 0.04735 3.57826 3.99988 2.09424 0.13144 1.22129 0.34934 + 42 2.55904 2.16444 2.70859 0.29952 609 t x - - + 1.12072 1.61936 1.63578 1.26895 + 0.07346 3.25910 3.42962 2.85641 0.05919 1.38363 0.28857 + 43 1.99923 1.61027 2.26343 0.57851 646 t x - - + 1.32290 1.58747 1.61095 1.11018 + 0.06656 3.08568 3.97944 2.44774 0.09046 0.75593 0.63407 + 44 0.23887 2.79899 2.55209 2.60783 675 a x - - + 1.18557 1.50323 1.59070 1.31590 + 0.05597 3.38637 3.88222 2.46900 0.08847 1.27945 0.32599 + 45 0.29593 2.53488 2.53903 2.32335 701 a x - - + 1.08710 1.54222 1.59276 1.40430 + 0.07539 2.94521 3.91062 1.91623 0.15918 1.22327 0.34852 + 46 2.58352 2.40524 2.76700 0.25955 725 t x - - + 1.19685 1.58503 1.74852 1.14293 + 0.06124 3.18279 4.02089 2.82961 0.06085 1.05474 0.42814 + 47 2.13251 2.88788 0.29508 2.50964 764 g x - - + 1.20891 1.55463 1.68206 1.19000 + 0.06526 3.12574 3.94910 2.41448 0.09367 1.10396 0.40280 + 48 2.23841 2.99164 0.25118 2.72900 792 g x - - + 1.26330 1.55339 1.52606 1.24355 + 0.05464 3.34968 4.01313 2.78872 0.06347 1.15133 0.38012 + 49 2.57533 0.32900 2.64632 2.01501 824 c x - - + 1.35118 1.39828 1.40141 1.39516 + 0.04340 3.79297 3.91506 1.59549 0.22666 1.20075 0.35806 + 50 0.46433 2.04127 2.23437 2.00605 833 a x - - + 1.23062 1.36903 1.62282 1.36182 + 0.05764 3.31530 3.92762 2.28791 0.10700 1.07910 0.41536 + 51 0.27513 2.77017 2.28518 2.57549 853 a x - - + 1.27958 1.58726 1.46109 1.25394 + 0.05750 3.30072 3.96214 2.60776 0.07656 1.25708 0.33475 + 52 0.20149 2.86434 2.84551 2.69770 883 a x - - + 1.23645 1.62259 1.71174 1.10368 + 0.05756 3.26729 4.02702 2.54508 0.08172 1.27391 0.32814 + 53 0.26982 2.65833 2.50477 2.46835 911 a x - - + 1.36005 1.50358 1.48100 1.22550 + 0.06921 3.37553 3.42118 2.36646 0.09851 1.27560 0.32748 + 54 0.40022 2.19284 2.22687 2.20396 934 a x - - + 1.12070 1.60472 1.53213 1.35895 + 0.05523 3.36752 3.94966 2.42917 0.09224 0.84774 0.55928 + 55 2.11356 0.46400 2.46442 1.79955 960 c x - - + 1.23932 1.35913 1.50478 1.46331 + 0.05187 3.47055 3.94022 2.35854 0.09933 1.12102 0.39445 + 56 1.85868 0.79440 2.22069 1.25971 983 c x - - + 1.21951 1.50212 1.51138 1.34185 + 0.06404 3.29054 3.69705 1.75742 0.18933 1.18410 0.36532 + 57 1.33272 2.32720 0.71452 1.90215 999 g x - - + 1.12229 1.49343 1.56653 1.42255 + 0.04920 3.46654 4.08749 2.17995 0.11996 1.31769 0.31164 + 58 2.48337 0.43652 2.46331 1.68683 1017 c x - - + 1.34704 1.55461 1.38112 1.28222 + 0.04823 3.61532 3.90311 2.20911 0.11631 1.00864 0.45368 + 59 0.41659 2.44509 1.93972 2.20507 1034 a x - - + 1.38198 1.38198 1.39194 1.38932 + 0.03641 3.98130 4.06929 1.35873 0.29704 1.31330 0.31325 + 60 0.41612 2.39160 1.97116 2.21075 1037 a x - - + 1.03649 1.46430 1.57421 1.57557 + 0.04769 3.52580 4.06641 2.32461 0.10294 0.84329 0.56263 + 61 2.66264 2.12302 2.82746 0.28581 1056 t x - - + 1.36925 1.39635 1.38930 1.39048 + 0.04097 3.97400 3.84718 1.39433 0.28502 1.12205 0.39395 + 62 2.26510 2.13196 2.42551 0.37231 1060 t x - - + 1.37965 1.39147 1.39147 1.38264 + 0.04082 3.91610 3.90805 1.24613 0.33914 0.95192 0.48776 + 63 0.41244 2.25761 2.16787 2.12907 1062 a x - - + 1.34515 1.41203 1.41203 1.37753 + 0.04054 3.77835 4.08203 1.30483 0.31638 1.11819 0.39582 + 64 2.51464 0.37905 2.62296 1.82008 1068 c x - - + 1.39543 1.38753 1.39233 1.37008 + 0.03854 3.90584 4.03535 1.36573 0.29463 1.13682 0.38689 + 65 2.16380 2.11332 2.18714 0.42765 1073 t x - - + 1.38764 1.38471 1.38519 1.38764 + 0.03575 4.05376 4.03073 1.40080 0.28289 1.03825 0.43707 + 66 2.79349 2.39141 2.87271 0.23478 1075 t x - - + 1.37227 1.39101 1.39101 1.39101 + 0.03597 4.01447 4.05827 1.39017 0.28639 1.06429 0.42308 + 67 2.82488 2.47749 2.93179 0.21887 1078 t x - - + 1.38141 1.39112 1.38915 1.38353 + 0.03661 3.99477 4.04370 1.35958 0.29675 1.13439 0.38804 + 68 2.77679 2.30433 2.90694 0.24425 1081 t x - - + 1.37593 1.38989 1.45520 1.32825 + 0.04447 3.68736 3.99242 1.76176 0.18843 0.98580 0.46703 + 69 2.47698 3.17398 0.19595 2.95437 1093 g x - - + 1.38264 1.38264 1.39734 1.38264 + 0.05358 3.96553 3.40487 1.40348 0.28202 1.03112 0.44100 + 70 2.84327 0.27906 2.97336 2.00890 1097 c x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03412 4.08811 4.08811 1.46634 0.26236 0.69006 0.69625 + 71 0.21870 2.83638 2.69251 2.65798 1098 a x - - + 1.37446 1.37942 1.39640 1.39509 + 0.03670 3.93983 4.09935 1.41905 0.27700 1.10002 0.40476 + 72 2.35233 0.46085 2.23804 1.78715 1103 c x - - + 1.38536 1.38781 1.38781 1.38421 + 0.03493 4.03822 4.09272 1.39310 0.28542 1.09638 0.40658 + 73 2.57111 0.32543 2.74124 1.98892 1105 c x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03381 4.09688 4.09688 1.46634 0.26236 1.09626 0.40664 + 74 0.27014 2.61416 2.53262 2.47636 1106 a x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03461 4.09267 4.05587 1.46634 0.26236 1.09748 0.40603 + 75 0.52873 2.16549 1.91736 1.90409 1107 a x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03426 4.08396 4.08396 1.46634 0.26236 1.07423 0.41788 + 76 2.33134 0.38082 2.65861 1.90055 1108 c x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03466 4.07266 4.07266 1.46634 0.26236 1.09861 0.40547 + 77 2.20588 0.45134 2.35553 1.84373 1109 c x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03550 4.04912 4.04912 1.46634 0.26236 1.09861 0.40547 + 78 2.69018 2.22054 2.82311 0.26898 1110 t x - - + 1.38629 1.38629 1.38629 1.38629 + 0.03711 4.00561 4.00561 1.46634 0.26236 1.09861 0.40547 + 79 0.16248 3.15867 2.86159 2.98963 1111 a x - - + 1.38629 1.38629 1.38629 1.38629 + 0.04048 3.92018 3.92018 1.46634 0.26236 1.09861 0.40547 + 80 0.17484 3.04770 2.86638 2.88183 1112 a x - - + 1.38629 1.38629 1.38629 1.38629 + 0.02045 3.90014 * 1.46634 0.26236 0.00000 * +// diff --git a/q2_types/hmmer/tests/test_format.py b/q2_types/hmmer/tests/test_format.py index 045dbf04..ca0c4803 100644 --- a/q2_types/hmmer/tests/test_format.py +++ b/q2_types/hmmer/tests/test_format.py @@ -11,7 +11,8 @@ from qiime2.plugin.testing import TestPluginBase from q2_types.hmmer._format import ( HmmIdmapFileFmt, BaseHmmPressedDirFmt, AminoHmmFileFmt, DnaHmmFileFmt, - # RnaHmmFileFmt + RnaHmmFileFmt, AminoHmmMultipleProfilesFileFmt, + DnaHmmMultipleProfilesFileFmt, RnaHmmMultipleProfilesFileFmt ) from qiime2.plugin import ValidationError @@ -101,6 +102,73 @@ def test_DnaHmmFileFmt_valid(self): fmt = DnaHmmFileFmt(self.get_data_path("hmms/dna.hmm"), "r") fmt.validate() - # def test_RnaHmmFileFmt_valid(self): - # fmt = RnaHmmFileFmt(self.get_data_path("hmms/rna.hmm"), "r") - # fmt.validate() + def test_RnaHmmFileFmt_valid(self): + fmt = RnaHmmFileFmt(self.get_data_path("hmms/rna.hmm"), "r") + fmt.validate() + + def test_AminoHmmFileFmt_invalid_alph(self): + for type in ["rna", "dna"]: + fmt = AminoHmmFileFmt(self.get_data_path(f"hmms/{type}.hmm"), "r") + with self.assertRaisesRegex( + ValidationError, "Found profile with alphabet " + ): + fmt.validate() + + def test_DnaHmmFileFmt_invalid_alph(self): + for type in ["rna", "amino"]: + fmt = DnaHmmFileFmt(self.get_data_path(f"hmms/{type}.hmm"), "r") + with self.assertRaisesRegex( + ValidationError, "Found profile with alphabet " + ): + fmt.validate() + + def test_RnaHmmFileFmt_invalid_alph(self): + for type in ["dna", "amino"]: + fmt = RnaHmmFileFmt(self.get_data_path(f"hmms/{type}.hmm"), "r") + with self.assertRaisesRegex( + ValidationError, "Found profile with alphabet " + ): + fmt.validate() + + def test_AminoHmmFileFmt_too_many_profiles(self): + fmt = AminoHmmFileFmt(self.get_data_path("hmms/4_amino.hmm"), "r") + with self.assertRaisesRegex( + ValidationError, "Expected 1 profile, found 4." + ): + fmt.validate() + + def test_AminoHmmMultipleProfilesFileFmt_valid(self): + fmt = AminoHmmMultipleProfilesFileFmt( + self.get_data_path("hmms/4_amino.hmm"), 'r' + ) + fmt.validate() + + def test_DnaHmmMultipleProfilesFileFmt_valid(self): + fmt = DnaHmmMultipleProfilesFileFmt( + self.get_data_path("hmms/2_dna.hmm"), "r" + ) + fmt.validate() + + def test_RnaHmmMultipleProfilesFileFmt_valid(self): + fmt = RnaHmmMultipleProfilesFileFmt( + self.get_data_path("hmms/2_rna.hmm"), "r" + ) + fmt.validate() + + def test_mixed_hmm_profiles_invalid_1(self): + fmt = AminoHmmMultipleProfilesFileFmt( + self.get_data_path("hmms/amino_dna.hmm"), 'r' + ) + with self.assertRaisesRegex( + ValidationError, "Found profiles with different alphabets." + ): + fmt.validate() + + def test_mixed_hmm_profiles_invalid_2(self): + fmt = DnaHmmMultipleProfilesFileFmt( + self.get_data_path("hmms/rna_dna.hmm"), 'r' + ) + with self.assertRaisesRegex( + ValidationError, "Found profiles with different alphabets." + ): + fmt.validate() From d26f2781b5069fd401b18274e874e2bd3ab78daa Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Tue, 4 Jun 2024 12:19:13 +0200 Subject: [PATCH 18/28] add pyhmmer to dependencies --- ci/recipe/meta.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/recipe/meta.yaml b/ci/recipe/meta.yaml index ec0180d6..28d5980f 100644 --- a/ci/recipe/meta.yaml +++ b/ci/recipe/meta.yaml @@ -26,6 +26,7 @@ requirements: - h5py - qiime2 {{ qiime2_epoch }}.* - samtools + - pyhmmer test: commands: From 69d2a810eb54c014e645f6de8df92d500eb9d554 Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Mon, 10 Jun 2024 14:41:59 +0200 Subject: [PATCH 19/28] renaming stuff --- q2_types/__init__.py | 2 +- q2_types/hmmer/__init__.py | 38 ---- q2_types/hmmer/_type.py | 135 -------------- .../tests/data/bacteria/bacteria.hmm.idmap | 19 -- .../tests/data/invalid_idmaps/1.hmm.idmap | 19 -- .../tests/data/invalid_idmaps/2.hmm.idmap | 19 -- .../tests/data/invalid_idmaps/3.hmm.idmap | 19 -- .../tests/data/invalid_idmaps/4.hmm.idmap | 19 -- q2_types/hmmer/tests/test_format.py | 174 ------------------ q2_types/profile_hmms/__init__.py | 42 +++++ q2_types/{hmmer => profile_hmms}/_format.py | 64 ++----- q2_types/profile_hmms/_type.py | 134 ++++++++++++++ .../{hmmer => profile_hmms}/tests/__init__.py | 0 .../tests/data/bacteria/bacteria.hmm.h3f | 0 .../tests/data/bacteria/bacteria.hmm.h3i | 0 .../tests/data/bacteria/bacteria.hmm.h3m | 0 .../tests/data/bacteria/bacteria.hmm.h3p | 0 .../tests/data/hmms/2_dna.hmm | 0 .../tests/data/hmms/2_rna.hmm | 0 .../tests/data/hmms/4_amino.hmm | 0 .../tests/data/hmms/amino.hmm | 0 .../tests/data/hmms/amino_dna.hmm | 0 .../tests/data/hmms/dna.hmm | 0 .../tests/data/hmms/rna.hmm | 0 .../tests/data/hmms/rna_dna.hmm | 0 q2_types/profile_hmms/tests/test_format.py | 103 +++++++++++ .../tests/test_type.py | 35 ++-- q2_types/reference_db/tests/test_format.py | 10 +- 28 files changed, 319 insertions(+), 513 deletions(-) delete mode 100644 q2_types/hmmer/__init__.py delete mode 100644 q2_types/hmmer/_type.py delete mode 100644 q2_types/hmmer/tests/data/bacteria/bacteria.hmm.idmap delete mode 100644 q2_types/hmmer/tests/data/invalid_idmaps/1.hmm.idmap delete mode 100644 q2_types/hmmer/tests/data/invalid_idmaps/2.hmm.idmap delete mode 100644 q2_types/hmmer/tests/data/invalid_idmaps/3.hmm.idmap delete mode 100644 q2_types/hmmer/tests/data/invalid_idmaps/4.hmm.idmap delete mode 100644 q2_types/hmmer/tests/test_format.py create mode 100644 q2_types/profile_hmms/__init__.py rename q2_types/{hmmer => profile_hmms}/_format.py (67%) create mode 100644 q2_types/profile_hmms/_type.py rename q2_types/{hmmer => profile_hmms}/tests/__init__.py (100%) rename q2_types/{hmmer => profile_hmms}/tests/data/bacteria/bacteria.hmm.h3f (100%) rename q2_types/{hmmer => profile_hmms}/tests/data/bacteria/bacteria.hmm.h3i (100%) rename q2_types/{hmmer => profile_hmms}/tests/data/bacteria/bacteria.hmm.h3m (100%) rename q2_types/{hmmer => profile_hmms}/tests/data/bacteria/bacteria.hmm.h3p (100%) rename q2_types/{hmmer => profile_hmms}/tests/data/hmms/2_dna.hmm (100%) rename q2_types/{hmmer => profile_hmms}/tests/data/hmms/2_rna.hmm (100%) rename q2_types/{hmmer => profile_hmms}/tests/data/hmms/4_amino.hmm (100%) rename q2_types/{hmmer => profile_hmms}/tests/data/hmms/amino.hmm (100%) rename q2_types/{hmmer => profile_hmms}/tests/data/hmms/amino_dna.hmm (100%) rename q2_types/{hmmer => profile_hmms}/tests/data/hmms/dna.hmm (100%) rename q2_types/{hmmer => profile_hmms}/tests/data/hmms/rna.hmm (100%) rename q2_types/{hmmer => profile_hmms}/tests/data/hmms/rna_dna.hmm (100%) create mode 100644 q2_types/profile_hmms/tests/test_format.py rename q2_types/{hmmer => profile_hmms}/tests/test_type.py (64%) diff --git a/q2_types/__init__.py b/q2_types/__init__.py index d78d5d56..8cb98d85 100644 --- a/q2_types/__init__.py +++ b/q2_types/__init__.py @@ -31,4 +31,4 @@ importlib.import_module('q2_types.genome_data') importlib.import_module('q2_types.kaiju') importlib.import_module('q2_types.reference_db') -importlib.import_module('q2_types.hmmer') +importlib.import_module('q2_types.profile_hmms') diff --git a/q2_types/hmmer/__init__.py b/q2_types/hmmer/__init__.py deleted file mode 100644 index 5e35863a..00000000 --- a/q2_types/hmmer/__init__.py +++ /dev/null @@ -1,38 +0,0 @@ -# ---------------------------------------------------------------------------- -# Copyright (c) 2023, QIIME 2 development team. -# -# Distributed under the terms of the Modified BSD License. -# -# The full license is in the file LICENSE, distributed with this software. -# ---------------------------------------------------------------------------- -from ._format import ( - AminoHmmMultipleProfilesFileFmt, - DnaHmmMultipleProfilesFileFmt, - RnaHmmMultipleProfilesFileFmt, - AminoHmmMultipleProfilesDirectoryFormat, - DnaHmmMultipleProfilesDirectoryFormat, - RnaHmmMultipleProfilesDirectoryFormat, - AminoHmmFileFmt, DnaHmmFileFmt, RnaHmmFileFmt, - AminoHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat, - BaseHmmPressedDirFmt -) -from ._type import ( - HMM, - SingleAmino, SingleDNA, SingleRNA, - MultipleAmino, MultipleDNA, MultipleRNA, - MultipleAminoPressed, MultipleDNAPressed, MultipleRNAPressed -) - -__all__ = [ - "AminoHmmMultipleProfilesFileFmt", "DnaHmmMultipleProfilesFileFmt", - "RnaHmmMultipleProfilesFileFmt", "AminoHmmMultipleProfilesDirectoryFormat", - "DnaHmmMultipleProfilesDirectoryFormat", - "RnaHmmMultipleProfilesDirectoryFormat", - "AminoHmmFileFmt", "DnaHmmFileFmt", "RnaHmmFileFmt", - "AminoHmmDirectoryFormat", "DnaHmmDirectoryFormat", - "RnaHmmDirectoryFormat", "HMM", - "SingleAmino", "SingleDNA", "SingleRNA", - "MultipleAmino", "MultipleDNA", "MultipleRNA", - "MultipleAminoPressed", "MultipleDNAPressed", "MultipleRNAPressed", - "BaseHmmPressedDirFmt" -] diff --git a/q2_types/hmmer/_type.py b/q2_types/hmmer/_type.py deleted file mode 100644 index 1714dacd..00000000 --- a/q2_types/hmmer/_type.py +++ /dev/null @@ -1,135 +0,0 @@ -# ---------------------------------------------------------------------------- -# Copyright (c) 2023, QIIME 2 development team. -# -# Distributed under the terms of the Modified BSD License. -# -# The full license is in the file LICENSE, distributed with this software. -# ---------------------------------------------------------------------------- -from qiime2.plugin import SemanticType -from q2_types.plugin_setup import plugin -from q2_types.hmmer._format import ( - AminoHmmMultipleProfilesDirectoryFormat, - DnaHmmMultipleProfilesDirectoryFormat, - RnaHmmMultipleProfilesDirectoryFormat, - AminoHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat, - BaseHmmPressedDirFmt -) - - -HMM = SemanticType('HMM', field_names='type') -SingleAmino = SemanticType( - 'SingleAmino', variant_of=HMM.field['type'] -) -SingleDNA = SemanticType( - 'SingleDNA', variant_of=HMM.field['type'] -) -SingleRNA = SemanticType( - 'SingleRNA', variant_of=HMM.field['type'] -) -MultipleAmino = SemanticType( - 'MultipleAmino', variant_of=HMM.field['type'] -) -MultipleDNA = SemanticType( - 'MultipleDNA', variant_of=HMM.field['type'] -) -MultipleRNA = SemanticType( - 'MultipleRNA', variant_of=HMM.field['type'] -) -MultipleAminoPressed = SemanticType( - 'MultipleAminoPressed', variant_of=HMM.field['type'] -) -MultipleDNAPressed = SemanticType( - 'MultipleDNAPressed', variant_of=HMM.field['type'] -) -MultipleRNAPressed = SemanticType( - 'MultipleRNAPressed', variant_of=HMM.field['type'] -) - -plugin.register_semantic_types( - HMM, - SingleAmino, SingleDNA, SingleRNA, - MultipleAmino, MultipleDNA, MultipleRNA, - MultipleAminoPressed, MultipleDNAPressed, - MultipleRNAPressed -) - -plugin.register_artifact_class( - HMM[MultipleAminoPressed], - directory_format=BaseHmmPressedDirFmt, - description=( - "A collection of Hidden Markov Model profiles for amino acid " - "sequences in binary format and indexed." - ) -) - -plugin.register_artifact_class( - HMM[MultipleDNAPressed], - directory_format=BaseHmmPressedDirFmt, - description=( - "A collection of Hidden Markov Model profiles for DNA " - "sequences in binary format and indexed." - ) -) - -plugin.register_artifact_class( - HMM[MultipleRNAPressed], - directory_format=BaseHmmPressedDirFmt, - description=( - "A collection of Hidden Markov Model profiles for RNA " - "sequences in binary format and indexed." - ) -) - -plugin.register_artifact_class( - HMM[SingleAmino], - directory_format=AminoHmmDirectoryFormat, - description=( - "One single Hidden Markov Model profile, representing a group " - "of related proteins." - ) -) - -plugin.register_artifact_class( - HMM[SingleDNA], - directory_format=DnaHmmDirectoryFormat, - description=( - "One single Hidden Markov Model profile, representing a group " - "of related DNA sequences." - ) -) - -plugin.register_artifact_class( - HMM[SingleRNA], - directory_format=RnaHmmDirectoryFormat, - description=( - "One single Hidden Markov Model profile, representing a group " - "of related RNA sequences." - ) -) - -plugin.register_artifact_class( - HMM[MultipleAmino], - directory_format=AminoHmmMultipleProfilesDirectoryFormat, - description=( - "A collection of Hidden Markov Model profiles, each representing a " - "group of related proteins." - ) -) - -plugin.register_artifact_class( - HMM[MultipleDNA], - directory_format=DnaHmmMultipleProfilesDirectoryFormat, - description=( - "A collection of Hidden Markov Model profiles, each representing a " - "group of related DNA sequences." - ) -) - -plugin.register_artifact_class( - HMM[MultipleRNA], - directory_format=RnaHmmMultipleProfilesDirectoryFormat, - description=( - "A collection of Hidden Markov Model profiles, each representing a " - "group of related RNA sequences." - ) -) diff --git a/q2_types/hmmer/tests/data/bacteria/bacteria.hmm.idmap b/q2_types/hmmer/tests/data/bacteria/bacteria.hmm.idmap deleted file mode 100644 index 4e7e0050..00000000 --- a/q2_types/hmmer/tests/data/bacteria/bacteria.hmm.idmap +++ /dev/null @@ -1,19 +0,0 @@ -1 1FKAT -2 1FIZK -3 1FIY1 -4 1FKA5 -5 1FIYP -6 1FK7D -7 1FIX5 -8 1FKCK -9 1FIXT -10 1FKBX -11 1FIYG -12 1FKAC -13 1FKB9 -14 1FK72 -15 1FK4H -16 1FK7S -17 1FK66 -18 1FK6W -19 1FIXC \ No newline at end of file diff --git a/q2_types/hmmer/tests/data/invalid_idmaps/1.hmm.idmap b/q2_types/hmmer/tests/data/invalid_idmaps/1.hmm.idmap deleted file mode 100644 index d8f7bc5c..00000000 --- a/q2_types/hmmer/tests/data/invalid_idmaps/1.hmm.idmap +++ /dev/null @@ -1,19 +0,0 @@ -1 1FKAT:"%#@ -2 1FIZK -3 1FIY1 -4 1FKA5 -5 1FIYP -6 1FK7D -7 1FIX5 -8 1FKCK -9 1FIXT -10 1FKBX -11 1FIYG -12 1FKAC -13 1FKB9 -14 1FK72 -15 1FK4H -16 1FK7S -17 1FK66 -18 1FK6W -19 1FIXC \ No newline at end of file diff --git a/q2_types/hmmer/tests/data/invalid_idmaps/2.hmm.idmap b/q2_types/hmmer/tests/data/invalid_idmaps/2.hmm.idmap deleted file mode 100644 index ea1cb3ba..00000000 --- a/q2_types/hmmer/tests/data/invalid_idmaps/2.hmm.idmap +++ /dev/null @@ -1,19 +0,0 @@ -1 1FKAT -2 1FIZK -3 1FIY1 -4 1FKA5 -5 1FIYP -6 1FK7D -7 1FIX5 -8 1FKCK -9 1FIXT -10 1FKBX -11 1FIYG -12 1FKAC -13 1FKB9 -14 1FK72 -15 1FK4H -16 1FK7S -17 1FK66 -18 1FK6W -19 1FIXC \ No newline at end of file diff --git a/q2_types/hmmer/tests/data/invalid_idmaps/3.hmm.idmap b/q2_types/hmmer/tests/data/invalid_idmaps/3.hmm.idmap deleted file mode 100644 index 1c28be88..00000000 --- a/q2_types/hmmer/tests/data/invalid_idmaps/3.hmm.idmap +++ /dev/null @@ -1,19 +0,0 @@ -1 1FKAT -2 1FIZK -3 1FIY1 -4 1FKA5 -5 1FIYP -6 1FK7D -7 1FIX5 -8 1FKCK -9 1FIXT -10 1FKBX -11 1FIYG -12 1FKAC -13 1FKB9 -14 1FK72 -15 1FK4H -16 1FK7S -17 1FK66 -18 1FK6W -20 1FIXC \ No newline at end of file diff --git a/q2_types/hmmer/tests/data/invalid_idmaps/4.hmm.idmap b/q2_types/hmmer/tests/data/invalid_idmaps/4.hmm.idmap deleted file mode 100644 index 260b29b8..00000000 --- a/q2_types/hmmer/tests/data/invalid_idmaps/4.hmm.idmap +++ /dev/null @@ -1,19 +0,0 @@ -1FKAT -2 1FIZK -3 1FIY1 -4 1FKA5 -5 1FIYP -6 1FK7D -7 1FIX5 -8 1FKCK -9 1FIXT -10 1FKBX -11 1FIYG -12 1FKAC -13 1FKB9 -14 1FK72 -15 1FK4H -16 1FK7S -17 1FK66 -18 1FK6W -19 1FIXC \ No newline at end of file diff --git a/q2_types/hmmer/tests/test_format.py b/q2_types/hmmer/tests/test_format.py deleted file mode 100644 index ca0c4803..00000000 --- a/q2_types/hmmer/tests/test_format.py +++ /dev/null @@ -1,174 +0,0 @@ -# ---------------------------------------------------------------------------- -# Copyright (c) 2023, QIIME 2 development team. -# -# Distributed under the terms of the Modified BSD License. -# -# The full license is in the file LICENSE, distributed with this software. -# ---------------------------------------------------------------------------- -import tempfile -import shutil -import os -from qiime2.plugin.testing import TestPluginBase -from q2_types.hmmer._format import ( - HmmIdmapFileFmt, BaseHmmPressedDirFmt, AminoHmmFileFmt, DnaHmmFileFmt, - RnaHmmFileFmt, AminoHmmMultipleProfilesFileFmt, - DnaHmmMultipleProfilesFileFmt, RnaHmmMultipleProfilesFileFmt -) -from qiime2.plugin import ValidationError - - -class TestHmmFormats(TestPluginBase): - package = 'q2_types.hmmer.tests' - - def test_HmmIdmapFileFmt_valid(self): - fmt = HmmIdmapFileFmt( - self.get_data_path("bacteria/bacteria.hmm.idmap"), 'r' - ) - fmt.validate() - - def test_HmmIdmapFileFmt_invalid_idmap_1(self): - fmt = HmmIdmapFileFmt( - self.get_data_path("invalid_idmaps/1.hmm.idmap"), 'r' - ) - with self.assertRaisesRegex( - ValidationError, - "Expected index and an alphanumeric code separated " - "by a single space." - ): - fmt.validate(level="min") - - def test_HmmIdmapFileFmt_invalid_idmap_2(self): - fmt = HmmIdmapFileFmt( - self.get_data_path("invalid_idmaps/2.hmm.idmap"), 'r' - ) - with self.assertRaisesRegex( - ValidationError, - "Expected index and an alphanumeric code separated " - "by a single space." - ): - fmt.validate(level="min") - - def test_HmmIdmapFileFmt_invalid_idmap_3(self): - fmt = HmmIdmapFileFmt( - self.get_data_path("invalid_idmaps/3.hmm.idmap"), 'r' - ) - with self.assertRaisesRegex( - ValidationError, - 'Expected index' - ): - fmt.validate(level="min") - - def test_HmmIdmapFileFmt_invalid_idmap_4(self): - fmt = HmmIdmapFileFmt( - self.get_data_path("invalid_idmaps/4.hmm.idmap"), 'r' - ) - with self.assertRaisesRegex( - ValidationError, - "Expected index and an alphanumeric code separated " - "by a single space." - ): - fmt.validate(level="min") - - def test_BaseHmmPressedDirFmt_missing_hmm(self): - with tempfile.TemporaryDirectory() as tmp: - shutil.copytree( - self.get_data_path("bacteria"), tmp, dirs_exist_ok=True - ) - os.remove(f"{tmp}/bacteria.hmm.h3f") - fmt = BaseHmmPressedDirFmt(tmp, 'r') - with self.assertRaisesRegex( - ValidationError, "Missing one or more files" - ): - fmt.validate(level="min") - - def test_BaseHmmPressedDirFmt_missing_idmap_ok(self): - with tempfile.TemporaryDirectory() as tmp: - shutil.copytree( - self.get_data_path("bacteria"), tmp, dirs_exist_ok=True - ) - os.remove(f"{tmp}/bacteria.hmm.idmap") - fmt = BaseHmmPressedDirFmt(tmp, 'r') - fmt.validate(level="min") - - def test_BaseHmmPressedDirFmt_valid(self): - fmt = BaseHmmPressedDirFmt(self.get_data_path("bacteria"), 'r') - fmt.validate(level="min") - - def test_AminoHmmFileFmt_valid(self): - fmt = AminoHmmFileFmt(self.get_data_path("hmms/amino.hmm"), "r") - fmt.validate() - - def test_DnaHmmFileFmt_valid(self): - fmt = DnaHmmFileFmt(self.get_data_path("hmms/dna.hmm"), "r") - fmt.validate() - - def test_RnaHmmFileFmt_valid(self): - fmt = RnaHmmFileFmt(self.get_data_path("hmms/rna.hmm"), "r") - fmt.validate() - - def test_AminoHmmFileFmt_invalid_alph(self): - for type in ["rna", "dna"]: - fmt = AminoHmmFileFmt(self.get_data_path(f"hmms/{type}.hmm"), "r") - with self.assertRaisesRegex( - ValidationError, "Found profile with alphabet " - ): - fmt.validate() - - def test_DnaHmmFileFmt_invalid_alph(self): - for type in ["rna", "amino"]: - fmt = DnaHmmFileFmt(self.get_data_path(f"hmms/{type}.hmm"), "r") - with self.assertRaisesRegex( - ValidationError, "Found profile with alphabet " - ): - fmt.validate() - - def test_RnaHmmFileFmt_invalid_alph(self): - for type in ["dna", "amino"]: - fmt = RnaHmmFileFmt(self.get_data_path(f"hmms/{type}.hmm"), "r") - with self.assertRaisesRegex( - ValidationError, "Found profile with alphabet " - ): - fmt.validate() - - def test_AminoHmmFileFmt_too_many_profiles(self): - fmt = AminoHmmFileFmt(self.get_data_path("hmms/4_amino.hmm"), "r") - with self.assertRaisesRegex( - ValidationError, "Expected 1 profile, found 4." - ): - fmt.validate() - - def test_AminoHmmMultipleProfilesFileFmt_valid(self): - fmt = AminoHmmMultipleProfilesFileFmt( - self.get_data_path("hmms/4_amino.hmm"), 'r' - ) - fmt.validate() - - def test_DnaHmmMultipleProfilesFileFmt_valid(self): - fmt = DnaHmmMultipleProfilesFileFmt( - self.get_data_path("hmms/2_dna.hmm"), "r" - ) - fmt.validate() - - def test_RnaHmmMultipleProfilesFileFmt_valid(self): - fmt = RnaHmmMultipleProfilesFileFmt( - self.get_data_path("hmms/2_rna.hmm"), "r" - ) - fmt.validate() - - def test_mixed_hmm_profiles_invalid_1(self): - fmt = AminoHmmMultipleProfilesFileFmt( - self.get_data_path("hmms/amino_dna.hmm"), 'r' - ) - with self.assertRaisesRegex( - ValidationError, "Found profiles with different alphabets." - ): - fmt.validate() - - def test_mixed_hmm_profiles_invalid_2(self): - fmt = DnaHmmMultipleProfilesFileFmt( - self.get_data_path("hmms/rna_dna.hmm"), 'r' - ) - with self.assertRaisesRegex( - ValidationError, "Found profiles with different alphabets." - ): - fmt.validate() diff --git a/q2_types/profile_hmms/__init__.py b/q2_types/profile_hmms/__init__.py new file mode 100644 index 00000000..cfdd5c6c --- /dev/null +++ b/q2_types/profile_hmms/__init__.py @@ -0,0 +1,42 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- +from ._format import ( + ProteinHmmMultipleProfilesFileFmt, + DnaHmmMultipleProfilesFileFmt, + RnaHmmMultipleProfilesFileFmt, + ProteinHmmMultipleProfilesDirectoryFormat, + DnaHmmMultipleProfilesDirectoryFormat, + RnaHmmMultipleProfilesDirectoryFormat, + ProteinHmmFileFmt, DnaHmmFileFmt, RnaHmmFileFmt, + ProteinHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat, + BaseHmmPressedDirFmt +) +from ._type import ( + ProfileHMM, + SingleProtein, SingleDNA, SingleRNA, + MultipleProtein, MultipleDNA, MultipleRNA, + PressedRNA, PressedDNA, PressedProtein +) + +__all__ = [ + "ProteinHmmMultipleProfilesFileFmt", + "DnaHmmMultipleProfilesFileFmt", + "RnaHmmMultipleProfilesFileFmt", + "ProteinHmmMultipleProfilesDirectoryFormat", + "DnaHmmMultipleProfilesDirectoryFormat", + "RnaHmmMultipleProfilesDirectoryFormat", + "ProteinHmmFileFmt", "DnaHmmFileFmt", "RnaHmmFileFmt", + "ProteinHmmDirectoryFormat", + "DnaHmmDirectoryFormat", + "RnaHmmDirectoryFormat", + "BaseHmmPressedDirFmt", + "ProfileHMM", + "SingleProtein", "SingleDNA", "SingleRNA", + "MultipleProtein", "MultipleDNA", "MultipleRNA", + "PressedRNA", "PressedDNA", "PressedProtein" +] diff --git a/q2_types/hmmer/_format.py b/q2_types/profile_hmms/_format.py similarity index 67% rename from q2_types/hmmer/_format.py rename to q2_types/profile_hmms/_format.py index 466c2495..b03ce069 100644 --- a/q2_types/hmmer/_format.py +++ b/q2_types/profile_hmms/_format.py @@ -5,7 +5,6 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- -import re from pyhmmer.plan7 import HMMFile from qiime2.plugin import model from qiime2.core.exceptions import ValidationError @@ -17,36 +16,6 @@ def _validate_(self, level): pass -class HmmIdmapFileFmt(model.TextFileFormat): - def _validate_(self, level): - with open(str(self), 'r') as file: - # Set the number of rows to be parsed - max_lines = {"min": 100, "max": 10000000}[level] - lines = file.readlines() - for i, line in enumerate(lines, 1): - # Check number of lines parsed so far - if i > max_lines: - break - - # Validate line - if not re.match(r'^(\d+) ([A-Z0-9]+)$', line): - raise ValidationError( - f"Invalid line {i}.\n" - f"{line} \n" - "Expected index and an alphanumeric code separated " - "by a single space." - ) - - # Check index is equal to line number - idx, code = line.rstrip("\n").split(sep=" ") - if not idx == str(i): - raise ValidationError( - f"Invalid line {i}.\n" - f"{line} \n" - f"Expected index {i} but got {idx} instead.\n" - ) - - class BaseHmmPressedDirFmt(model.DirectoryFormat): """ The .h3m file contains the profile HMMs @@ -60,9 +29,6 @@ class BaseHmmPressedDirFmt(model.DirectoryFormat): h3i = model.File(r'.*\.hmm\.h3i', format=HmmBinaryFileFmt) h3f = model.File(r'.*\.hmm\.h3f', format=HmmBinaryFileFmt) h3p = model.File(r'.*\.hmm\.h3p', format=HmmBinaryFileFmt) - idmap = model.File( - r'.*\.hmm\.idmap', format=HmmIdmapFileFmt, optional=True - ) class HmmBaseFileFmt(model.TextFileFormat): @@ -102,7 +68,7 @@ def _validate_file_fmt( ) -class AminoHmmFileFmt(HmmBaseFileFmt): +class ProteinHmmFileFmt(HmmBaseFileFmt): alphabet = "amino" def _validate_(self, level): @@ -123,17 +89,17 @@ def _validate_(self, level): self._validate_file_fmt(level, self.alphabet, True) -AminoHmmDirectoryFormat = model.SingleFileDirectoryFormat( - 'AminoHmmFileFmt', 'profile.hmm', AminoHmmFileFmt) +ProteinHmmDirectoryFormat = model.SingleFileDirectoryFormat( + 'AminoHmmFileFmt', r'.*\..hmm', ProteinHmmFileFmt) DnaHmmDirectoryFormat = model.SingleFileDirectoryFormat( - 'DnaHmmFileFmt', 'profile.hmm', DnaHmmFileFmt) + 'DnaHmmFileFmt', r'.*\..hmm', DnaHmmFileFmt) RnaHmmDirectoryFormat = model.SingleFileDirectoryFormat( - 'RnaHmmFileFmt', 'profile.hmm', RnaHmmFileFmt) + 'RnaHmmFileFmt', r'.*\..hmm', RnaHmmFileFmt) -class AminoHmmMultipleProfilesFileFmt(AminoHmmFileFmt): +class ProteinHmmMultipleProfilesFileFmt(ProteinHmmFileFmt): def _validate_(self, level): self._validate_file_fmt(level, self.alphabet, False) @@ -148,28 +114,30 @@ def _validate_(self, level): self._validate_file_fmt(level, self.alphabet, False) -AminoHmmMultipleProfilesDirectoryFormat = model.SingleFileDirectoryFormat( +ProteinHmmMultipleProfilesDirectoryFormat = model.SingleFileDirectoryFormat( 'AminoHmmMultipleProfilesDirectoryFormat', - 'profile.hmm', - AminoHmmMultipleProfilesFileFmt + r'.*\..hmm', + ProteinHmmMultipleProfilesFileFmt ) DnaHmmMultipleProfilesDirectoryFormat = model.SingleFileDirectoryFormat( 'DnaHmmMultipleProfilesDirectoryFormat', - 'profile.hmm', + r'.*\..hmm', DnaHmmMultipleProfilesFileFmt, ) RnaHmmMultipleProfilesDirectoryFormat = model.SingleFileDirectoryFormat( 'RnaHmmMultipleProfilesDirectoryFormat', - 'profile.hmm', + r'.*\..hmm', RnaHmmMultipleProfilesFileFmt, ) plugin.register_formats( - AminoHmmMultipleProfilesFileFmt, DnaHmmMultipleProfilesFileFmt, - RnaHmmMultipleProfilesFileFmt, AminoHmmMultipleProfilesDirectoryFormat, + ProteinHmmMultipleProfilesFileFmt, + DnaHmmMultipleProfilesFileFmt, + RnaHmmMultipleProfilesFileFmt, + ProteinHmmMultipleProfilesDirectoryFormat, DnaHmmMultipleProfilesDirectoryFormat, RnaHmmMultipleProfilesDirectoryFormat, - AminoHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat + ProteinHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat ) diff --git a/q2_types/profile_hmms/_type.py b/q2_types/profile_hmms/_type.py new file mode 100644 index 00000000..86233016 --- /dev/null +++ b/q2_types/profile_hmms/_type.py @@ -0,0 +1,134 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- +from qiime2.plugin import SemanticType +from q2_types.plugin_setup import plugin +from q2_types.profile_hmms._format import ( + ProteinHmmMultipleProfilesDirectoryFormat, + DnaHmmMultipleProfilesDirectoryFormat, + RnaHmmMultipleProfilesDirectoryFormat, + ProteinHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat, + BaseHmmPressedDirFmt +) + + +ProfileHMM = SemanticType('ProfileHMM', field_names='type') +SingleProtein = SemanticType( + 'SingleProtein', variant_of=ProfileHMM.field['type'] +) +SingleDNA = SemanticType( + 'SingleDNA', variant_of=ProfileHMM.field['type'] +) +SingleRNA = SemanticType( + 'SingleRNA', variant_of=ProfileHMM.field['type'] +) +MultipleProtein = SemanticType( + 'MultipleProtein', variant_of=ProfileHMM.field['type'] +) +MultipleDNA = SemanticType( + 'MultipleDNA', variant_of=ProfileHMM.field['type'] +) +MultipleRNA = SemanticType( + 'MultipleRNA', variant_of=ProfileHMM.field['type'] +) +PressedProtein = SemanticType( + 'PressedProtein', variant_of=ProfileHMM.field['type'] +) +PressedDNA = SemanticType( + 'PressedDNA', variant_of=ProfileHMM.field['type'] +) +PressedRNA = SemanticType( + 'PressedRNA', variant_of=ProfileHMM.field['type'] +) + +plugin.register_semantic_types( + ProfileHMM, + SingleProtein, SingleDNA, SingleRNA, + MultipleProtein, MultipleDNA, MultipleRNA, + PressedProtein, PressedDNA, PressedRNA +) + +plugin.register_artifact_class( + ProfileHMM[PressedProtein], + directory_format=BaseHmmPressedDirFmt, + description=( + "A collection of profile Hidden Markov Models for amino acid " + "sequences in binary format and indexed." + ) +) + +plugin.register_artifact_class( + ProfileHMM[PressedDNA], + directory_format=BaseHmmPressedDirFmt, + description=( + "A collection of profile Hidden Markov Models for DNA " + "sequences in binary format and indexed." + ) +) + +plugin.register_artifact_class( + ProfileHMM[PressedRNA], + directory_format=BaseHmmPressedDirFmt, + description=( + "A collection of profile Hidden Markov Models for RNA " + "sequences in binary format and indexed." + ) +) + +plugin.register_artifact_class( + ProfileHMM[SingleProtein], + directory_format=ProteinHmmDirectoryFormat, + description=( + "One single profile Hidden Markov Model representing a group " + "of related proteins." + ) +) + +plugin.register_artifact_class( + ProfileHMM[SingleDNA], + directory_format=DnaHmmDirectoryFormat, + description=( + "One single profile Hidden Markov Model representing a group " + "of related DNA sequences." + ) +) + +plugin.register_artifact_class( + ProfileHMM[SingleRNA], + directory_format=RnaHmmDirectoryFormat, + description=( + "One single profile Hidden Markov Model representing a group " + "of related RNA sequences." + ) +) + +plugin.register_artifact_class( + ProfileHMM[MultipleProtein], + directory_format=ProteinHmmMultipleProfilesDirectoryFormat, + description=( + "A collection of profile Hidden Markov Models, " + "each representing a group of related proteins." + ) +) + +plugin.register_artifact_class( + ProfileHMM[MultipleDNA], + directory_format=DnaHmmMultipleProfilesDirectoryFormat, + description=( + "A collection of profile Hidden Markov Models, " + "each representing a group of related DNA sequences." + ) +) + +plugin.register_artifact_class( + ProfileHMM[MultipleRNA], + directory_format=RnaHmmMultipleProfilesDirectoryFormat, + description=( + "A collection of profile Hidden Markov Models, " + "each representing a group of related RNA sequences." + ) +) diff --git a/q2_types/hmmer/tests/__init__.py b/q2_types/profile_hmms/tests/__init__.py similarity index 100% rename from q2_types/hmmer/tests/__init__.py rename to q2_types/profile_hmms/tests/__init__.py diff --git a/q2_types/hmmer/tests/data/bacteria/bacteria.hmm.h3f b/q2_types/profile_hmms/tests/data/bacteria/bacteria.hmm.h3f similarity index 100% rename from q2_types/hmmer/tests/data/bacteria/bacteria.hmm.h3f rename to q2_types/profile_hmms/tests/data/bacteria/bacteria.hmm.h3f diff --git a/q2_types/hmmer/tests/data/bacteria/bacteria.hmm.h3i b/q2_types/profile_hmms/tests/data/bacteria/bacteria.hmm.h3i similarity index 100% rename from q2_types/hmmer/tests/data/bacteria/bacteria.hmm.h3i rename to q2_types/profile_hmms/tests/data/bacteria/bacteria.hmm.h3i diff --git a/q2_types/hmmer/tests/data/bacteria/bacteria.hmm.h3m b/q2_types/profile_hmms/tests/data/bacteria/bacteria.hmm.h3m similarity index 100% rename from q2_types/hmmer/tests/data/bacteria/bacteria.hmm.h3m rename to q2_types/profile_hmms/tests/data/bacteria/bacteria.hmm.h3m diff --git a/q2_types/hmmer/tests/data/bacteria/bacteria.hmm.h3p b/q2_types/profile_hmms/tests/data/bacteria/bacteria.hmm.h3p similarity index 100% rename from q2_types/hmmer/tests/data/bacteria/bacteria.hmm.h3p rename to q2_types/profile_hmms/tests/data/bacteria/bacteria.hmm.h3p diff --git a/q2_types/hmmer/tests/data/hmms/2_dna.hmm b/q2_types/profile_hmms/tests/data/hmms/2_dna.hmm similarity index 100% rename from q2_types/hmmer/tests/data/hmms/2_dna.hmm rename to q2_types/profile_hmms/tests/data/hmms/2_dna.hmm diff --git a/q2_types/hmmer/tests/data/hmms/2_rna.hmm b/q2_types/profile_hmms/tests/data/hmms/2_rna.hmm similarity index 100% rename from q2_types/hmmer/tests/data/hmms/2_rna.hmm rename to q2_types/profile_hmms/tests/data/hmms/2_rna.hmm diff --git a/q2_types/hmmer/tests/data/hmms/4_amino.hmm b/q2_types/profile_hmms/tests/data/hmms/4_amino.hmm similarity index 100% rename from q2_types/hmmer/tests/data/hmms/4_amino.hmm rename to q2_types/profile_hmms/tests/data/hmms/4_amino.hmm diff --git a/q2_types/hmmer/tests/data/hmms/amino.hmm b/q2_types/profile_hmms/tests/data/hmms/amino.hmm similarity index 100% rename from q2_types/hmmer/tests/data/hmms/amino.hmm rename to q2_types/profile_hmms/tests/data/hmms/amino.hmm diff --git a/q2_types/hmmer/tests/data/hmms/amino_dna.hmm b/q2_types/profile_hmms/tests/data/hmms/amino_dna.hmm similarity index 100% rename from q2_types/hmmer/tests/data/hmms/amino_dna.hmm rename to q2_types/profile_hmms/tests/data/hmms/amino_dna.hmm diff --git a/q2_types/hmmer/tests/data/hmms/dna.hmm b/q2_types/profile_hmms/tests/data/hmms/dna.hmm similarity index 100% rename from q2_types/hmmer/tests/data/hmms/dna.hmm rename to q2_types/profile_hmms/tests/data/hmms/dna.hmm diff --git a/q2_types/hmmer/tests/data/hmms/rna.hmm b/q2_types/profile_hmms/tests/data/hmms/rna.hmm similarity index 100% rename from q2_types/hmmer/tests/data/hmms/rna.hmm rename to q2_types/profile_hmms/tests/data/hmms/rna.hmm diff --git a/q2_types/hmmer/tests/data/hmms/rna_dna.hmm b/q2_types/profile_hmms/tests/data/hmms/rna_dna.hmm similarity index 100% rename from q2_types/hmmer/tests/data/hmms/rna_dna.hmm rename to q2_types/profile_hmms/tests/data/hmms/rna_dna.hmm diff --git a/q2_types/profile_hmms/tests/test_format.py b/q2_types/profile_hmms/tests/test_format.py new file mode 100644 index 00000000..e7cdd320 --- /dev/null +++ b/q2_types/profile_hmms/tests/test_format.py @@ -0,0 +1,103 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- +from qiime2.plugin.testing import TestPluginBase +from q2_types.profile_hmms._format import ( + BaseHmmPressedDirFmt, + ProteinHmmFileFmt, DnaHmmFileFmt, RnaHmmFileFmt, + ProteinHmmMultipleProfilesFileFmt, + DnaHmmMultipleProfilesFileFmt, + RnaHmmMultipleProfilesFileFmt +) +from qiime2.plugin import ValidationError + + +class TestHmmFormats(TestPluginBase): + package = 'q2_types.hmmer.tests' + + def test_BaseHmmPressedDirFmt_valid(self): + fmt = BaseHmmPressedDirFmt(self.get_data_path("bacteria"), 'r') + fmt.validate(level="min") + + def test_AminoHmmFileFmt_valid(self): + fmt = ProteinHmmFileFmt(self.get_data_path("hmms/amino.hmm"), "r") + fmt.validate() + + def test_DnaHmmFileFmt_valid(self): + fmt = DnaHmmFileFmt(self.get_data_path("hmms/dna.hmm"), "r") + fmt.validate() + + def test_RnaHmmFileFmt_valid(self): + fmt = RnaHmmFileFmt(self.get_data_path("hmms/rna.hmm"), "r") + fmt.validate() + + def test_AminoHmmFileFmt_invalid_alph(self): + for typ in ["rna", "dna"]: + fmt = ProteinHmmFileFmt(self.get_data_path(f"hmms/{typ}.hmm"), "r") + with self.assertRaisesRegex( + ValidationError, "Found profile with alphabet " + ): + fmt.validate() + + def test_DnaHmmFileFmt_invalid_alph(self): + for typ in ["rna", "amino"]: + fmt = DnaHmmFileFmt(self.get_data_path(f"hmms/{typ}.hmm"), "r") + with self.assertRaisesRegex( + ValidationError, "Found profile with alphabet " + ): + fmt.validate() + + def test_RnaHmmFileFmt_invalid_alph(self): + for typ in ["dna", "amino"]: + fmt = RnaHmmFileFmt(self.get_data_path(f"hmms/{typ}.hmm"), "r") + with self.assertRaisesRegex( + ValidationError, "Found profile with alphabet " + ): + fmt.validate() + + def test_AminoHmmFileFmt_too_many_profiles(self): + fmt = ProteinHmmFileFmt(self.get_data_path("hmms/4_amino.hmm"), "r") + with self.assertRaisesRegex( + ValidationError, "Expected 1 profile, found 4." + ): + fmt.validate() + + def test_AminoHmmMultipleProfilesFileFmt_valid(self): + fmt = ProteinHmmMultipleProfilesFileFmt( + self.get_data_path("hmms/4_amino.hmm"), 'r' + ) + fmt.validate() + + def test_DnaHmmMultipleProfilesFileFmt_valid(self): + fmt = DnaHmmMultipleProfilesFileFmt( + self.get_data_path("hmms/2_dna.hmm"), "r" + ) + fmt.validate() + + def test_RnaHmmMultipleProfilesFileFmt_valid(self): + fmt = RnaHmmMultipleProfilesFileFmt( + self.get_data_path("hmms/2_rna.hmm"), "r" + ) + fmt.validate() + + def test_mixed_hmm_profiles_invalid_1(self): + fmt = ProteinHmmMultipleProfilesFileFmt( + self.get_data_path("hmms/amino_dna.hmm"), 'r' + ) + with self.assertRaisesRegex( + ValidationError, "Found profiles with different alphabets." + ): + fmt.validate() + + def test_mixed_hmm_profiles_invalid_2(self): + fmt = DnaHmmMultipleProfilesFileFmt( + self.get_data_path("hmms/rna_dna.hmm"), 'r' + ) + with self.assertRaisesRegex( + ValidationError, "Found profiles with different alphabets." + ): + fmt.validate() diff --git a/q2_types/hmmer/tests/test_type.py b/q2_types/profile_hmms/tests/test_type.py similarity index 64% rename from q2_types/hmmer/tests/test_type.py rename to q2_types/profile_hmms/tests/test_type.py index 734aa00c..ed040495 100644 --- a/q2_types/hmmer/tests/test_type.py +++ b/q2_types/profile_hmms/tests/test_type.py @@ -6,15 +6,15 @@ # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- from qiime2.plugin.testing import TestPluginBase -from q2_types.hmmer import ( - HMM, BaseHmmPressedDirFmt, - AminoHmmMultipleProfilesDirectoryFormat, +from q2_types.profile_hmms._type import ( + ProfileHMM, BaseHmmPressedDirFmt, + ProteinHmmMultipleProfilesDirectoryFormat, DnaHmmMultipleProfilesDirectoryFormat, RnaHmmMultipleProfilesDirectoryFormat, - AminoHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat, - SingleAmino, SingleDNA, SingleRNA, - MultipleAmino, MultipleDNA, MultipleRNA, - MultipleAminoPressed, MultipleDNAPressed, MultipleRNAPressed + ProteinHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat, + SingleProtein, SingleDNA, SingleRNA, + MultipleProtein, MultipleDNA, MultipleRNA, + PressedProtein, PressedDNA, PressedRNA ) @@ -22,49 +22,50 @@ class TestHMMType(TestPluginBase): package = 'q2_types.reference_db.tests' def test_hmmer_registration(self): - self.assertRegisteredSemanticType(HMM) + self.assertRegisteredSemanticType(ProfileHMM) def test_SingleAmino_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( - HMM[SingleAmino], AminoHmmDirectoryFormat + ProfileHMM[SingleProtein], ProteinHmmDirectoryFormat ) def test_SingleDNA_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( - HMM[SingleDNA], DnaHmmDirectoryFormat + ProfileHMM[SingleDNA], DnaHmmDirectoryFormat ) def test_SingleRNA_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( - HMM[SingleRNA], RnaHmmDirectoryFormat + ProfileHMM[SingleRNA], RnaHmmDirectoryFormat ) def test_MultipleAmino_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( - HMM[MultipleAmino], AminoHmmMultipleProfilesDirectoryFormat + ProfileHMM[MultipleProtein], + ProteinHmmMultipleProfilesDirectoryFormat ) def test_MultipleDNA_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( - HMM[MultipleDNA], DnaHmmMultipleProfilesDirectoryFormat + ProfileHMM[MultipleDNA], DnaHmmMultipleProfilesDirectoryFormat ) def test_MultipleRNA_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( - HMM[MultipleRNA], RnaHmmMultipleProfilesDirectoryFormat + ProfileHMM[MultipleRNA], RnaHmmMultipleProfilesDirectoryFormat ) def test_MultipleAminoPressed_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( - HMM[MultipleAminoPressed], BaseHmmPressedDirFmt + ProfileHMM[PressedProtein], BaseHmmPressedDirFmt ) def test_MultipleDNAPressed_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( - HMM[MultipleDNAPressed], BaseHmmPressedDirFmt + ProfileHMM[PressedDNA], BaseHmmPressedDirFmt ) def test_MultipleRNAPressed_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( - HMM[MultipleRNAPressed], BaseHmmPressedDirFmt + ProfileHMM[PressedRNA], BaseHmmPressedDirFmt ) diff --git a/q2_types/reference_db/tests/test_format.py b/q2_types/reference_db/tests/test_format.py index 105b27da..41218089 100644 --- a/q2_types/reference_db/tests/test_format.py +++ b/q2_types/reference_db/tests/test_format.py @@ -7,11 +7,11 @@ # ---------------------------------------------------------------------------- from qiime2.plugin.testing import TestPluginBase from q2_types.reference_db._format import ( - DiamondDatabaseFileFmt, DiamondDatabaseDirFmt, EggnogRefBinFileFmt, - EggnogRefDirFmt, NCBITaxonomyNamesFormat, NCBITaxonomyNodesFormat, - NCBITaxonomyDirFmt, NCBITaxonomyBinaryFileFmt, - EggnogProteinSequencesDirFmt, EggnogRefTextFileFmt - ) + DiamondDatabaseFileFmt, DiamondDatabaseDirFmt, EggnogRefBinFileFmt, + EggnogRefDirFmt, NCBITaxonomyNamesFormat, NCBITaxonomyNodesFormat, + NCBITaxonomyDirFmt, NCBITaxonomyBinaryFileFmt, + EggnogProteinSequencesDirFmt, EggnogRefTextFileFmt +) from qiime2.plugin import ValidationError From c6e61afc2809196526b34e38892962991e9ffe9e Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Mon, 10 Jun 2024 14:58:37 +0200 Subject: [PATCH 20/28] update package in tests --- q2_types/profile_hmms/tests/test_format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/q2_types/profile_hmms/tests/test_format.py b/q2_types/profile_hmms/tests/test_format.py index e7cdd320..d556c53e 100644 --- a/q2_types/profile_hmms/tests/test_format.py +++ b/q2_types/profile_hmms/tests/test_format.py @@ -17,7 +17,7 @@ class TestHmmFormats(TestPluginBase): - package = 'q2_types.hmmer.tests' + package = 'q2_types.profile_hmms.tests' def test_BaseHmmPressedDirFmt_valid(self): fmt = BaseHmmPressedDirFmt(self.get_data_path("bacteria"), 'r') From 9ae98e19e981a4ae94e961287424f423a1838ff6 Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Mon, 10 Jun 2024 17:12:40 +0200 Subject: [PATCH 21/28] change to name an classes --- q2_types/profile_hmms/__init__.py | 36 +++---- q2_types/profile_hmms/_format.py | 112 ++++++++++----------- q2_types/profile_hmms/_type.py | 30 +++--- q2_types/profile_hmms/tests/test_format.py | 78 ++++++++------ q2_types/profile_hmms/tests/test_type.py | 34 ++++--- 5 files changed, 151 insertions(+), 139 deletions(-) diff --git a/q2_types/profile_hmms/__init__.py b/q2_types/profile_hmms/__init__.py index cfdd5c6c..0ce79c71 100644 --- a/q2_types/profile_hmms/__init__.py +++ b/q2_types/profile_hmms/__init__.py @@ -6,15 +6,13 @@ # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- from ._format import ( - ProteinHmmMultipleProfilesFileFmt, - DnaHmmMultipleProfilesFileFmt, - RnaHmmMultipleProfilesFileFmt, - ProteinHmmMultipleProfilesDirectoryFormat, - DnaHmmMultipleProfilesDirectoryFormat, - RnaHmmMultipleProfilesDirectoryFormat, - ProteinHmmFileFmt, DnaHmmFileFmt, RnaHmmFileFmt, - ProteinHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat, - BaseHmmPressedDirFmt + PressedProfileHmmsDirectoryFmt, + ProteinSingleProfileHmmDirectoryFmt, + ProteinMultipleProfileHmmDirectoryFmt, + DnaSingleProfileHmmDirectoryFmt, + DnaMultipleProfileHmmDirectoryFmt, + RnaSingleProfileHmmDirectoryFmt, + RnaMultipleProfileHmmDirectoryFmt ) from ._type import ( ProfileHMM, @@ -24,17 +22,15 @@ ) __all__ = [ - "ProteinHmmMultipleProfilesFileFmt", - "DnaHmmMultipleProfilesFileFmt", - "RnaHmmMultipleProfilesFileFmt", - "ProteinHmmMultipleProfilesDirectoryFormat", - "DnaHmmMultipleProfilesDirectoryFormat", - "RnaHmmMultipleProfilesDirectoryFormat", - "ProteinHmmFileFmt", "DnaHmmFileFmt", "RnaHmmFileFmt", - "ProteinHmmDirectoryFormat", - "DnaHmmDirectoryFormat", - "RnaHmmDirectoryFormat", - "BaseHmmPressedDirFmt", + "ProfileHmmBinaryFileFmt", + "PressedProfileHmmsDirectoryFmt", + "ProfileHmmFileFmt", + "ProteinSingleProfileHmmDirectoryFmt", + "ProteinMultipleProfileHmmDirectoryFmt", + "DnaSingleProfileHmmDirectoryFmt", + "DnaMultipleProfileHmmDirectoryFmt", + "RnaSingleProfileHmmDirectoryFmt", + "RnaMultipleProfileHmmDirectoryFmt", "ProfileHMM", "SingleProtein", "SingleDNA", "SingleRNA", "MultipleProtein", "MultipleDNA", "MultipleRNA", diff --git a/q2_types/profile_hmms/_format.py b/q2_types/profile_hmms/_format.py index b03ce069..d10046f8 100644 --- a/q2_types/profile_hmms/_format.py +++ b/q2_types/profile_hmms/_format.py @@ -11,12 +11,12 @@ from q2_types.plugin_setup import plugin -class HmmBinaryFileFmt(model.BinaryFileFormat): +class ProfileHmmBinaryFileFmt(model.BinaryFileFormat): def _validate_(self, level): pass -class BaseHmmPressedDirFmt(model.DirectoryFormat): +class PressedProfileHmmsDirectoryFmt(model.DirectoryFormat): """ The .h3m file contains the profile HMMs and their annotation in a binary format. The .h3i file is an @@ -25,16 +25,14 @@ class BaseHmmPressedDirFmt(model.DirectoryFormat): (the MSV filter). The .h3p file contains precomputed data structures for the rest of each profile. """ - h3m = model.File(r'.*\.hmm\.h3m', format=HmmBinaryFileFmt) - h3i = model.File(r'.*\.hmm\.h3i', format=HmmBinaryFileFmt) - h3f = model.File(r'.*\.hmm\.h3f', format=HmmBinaryFileFmt) - h3p = model.File(r'.*\.hmm\.h3p', format=HmmBinaryFileFmt) + h3m = model.File(r'.*\.hmm\.h3m', format=ProfileHmmBinaryFileFmt) + h3i = model.File(r'.*\.hmm\.h3i', format=ProfileHmmBinaryFileFmt) + h3f = model.File(r'.*\.hmm\.h3f', format=ProfileHmmBinaryFileFmt) + h3p = model.File(r'.*\.hmm\.h3p', format=ProfileHmmBinaryFileFmt) -class HmmBaseFileFmt(model.TextFileFormat): - def _validate_file_fmt( - self, level: str, alphabet: str, single_profile: bool - ): +class ProfileHmmFileFmt(model.TextFileFormat): + def _validate_(self, level: str): """ Check http://eddylab.org/software/hmmer/Userguide.pdf section "HMMER profile HMM files" for full description of @@ -52,7 +50,7 @@ def _validate_file_fmt( f"{e}" ) - if len(hmm_profiles) > 1 and single_profile: + if len(hmm_profiles) > 1 and self.single_profile: raise ValidationError( f"Expected 1 profile, found {len(hmm_profiles)}." ) @@ -60,84 +58,80 @@ def _validate_file_fmt( for hmm_profile in hmm_profiles[:parse_n_profiles]: hmm_profile.validate(tolerance=tolerance) - if hmm_profile.alphabet.type.lower() != alphabet: + if hmm_profile.alphabet.type.lower() != self.alphabet: raise ValidationError( "Found profile with alphabet " f"{hmm_profile.alphabet.type.lower()}\n" - f"{self.__class__} only accepts {alphabet} profiles." + f"Expected alphabet: {self.alphabet}." ) -class ProteinHmmFileFmt(HmmBaseFileFmt): +class ProteinProfileHmmFileFmt(ProfileHmmFileFmt): alphabet = "amino" - def _validate_(self, level): - self._validate_file_fmt(level, self.alphabet, True) +class ProteinSingleProfileHmmFileFmt(ProteinProfileHmmFileFmt): + single_profile = True + + +class ProteinMultipleProfileHmmFileFmt(ProteinProfileHmmFileFmt): + single_profile = False -class DnaHmmFileFmt(HmmBaseFileFmt): + +class DnaProfileHmmFileFmt(ProfileHmmFileFmt): alphabet = "dna" - def _validate_(self, level): - self._validate_file_fmt(level, self.alphabet, True) +class DnaSingleProfileHmmFileFmt(DnaProfileHmmFileFmt): + single_profile = True -class RnaHmmFileFmt(HmmBaseFileFmt): + +class DnaMultipleProfileHmmFileFmt(DnaProfileHmmFileFmt): + single_profile = False + + +class RnaProfileHmmFileFmt(ProfileHmmFileFmt): alphabet = "rna" - def _validate_(self, level): - self._validate_file_fmt(level, self.alphabet, True) +class RnaSingleProfileHmmFileFmt(RnaProfileHmmFileFmt): + single_profile = True -ProteinHmmDirectoryFormat = model.SingleFileDirectoryFormat( - 'AminoHmmFileFmt', r'.*\..hmm', ProteinHmmFileFmt) -DnaHmmDirectoryFormat = model.SingleFileDirectoryFormat( - 'DnaHmmFileFmt', r'.*\..hmm', DnaHmmFileFmt) +class RnaMultipleProfileHmmFileFmt(RnaProfileHmmFileFmt): + single_profile = False -RnaHmmDirectoryFormat = model.SingleFileDirectoryFormat( - 'RnaHmmFileFmt', r'.*\..hmm', RnaHmmFileFmt) +class ProteinSingleProfileHmmDirectoryFmt(model.DirectoryFormat): + profile = model.File(r'.*\.hmm', format=ProteinSingleProfileHmmFileFmt) -class ProteinHmmMultipleProfilesFileFmt(ProteinHmmFileFmt): - def _validate_(self, level): - self._validate_file_fmt(level, self.alphabet, False) +class ProteinMultipleProfileHmmDirectoryFmt(model.DirectoryFormat): + profiles = model.File(r'.*\.hmm', format=ProteinMultipleProfileHmmFileFmt) -class DnaHmmMultipleProfilesFileFmt(DnaHmmFileFmt): - def _validate_(self, level): - self._validate_file_fmt(level, self.alphabet, False) +class DnaSingleProfileHmmDirectoryFmt(model.DirectoryFormat): + profile = model.File(r'.*\.hmm', format=DnaSingleProfileHmmFileFmt) -class RnaHmmMultipleProfilesFileFmt(RnaHmmFileFmt): - def _validate_(self, level): - self._validate_file_fmt(level, self.alphabet, False) +class DnaMultipleProfileHmmDirectoryFmt(model.DirectoryFormat): + profiles = model.File(r'.*\.hmm', format=DnaMultipleProfileHmmFileFmt) -ProteinHmmMultipleProfilesDirectoryFormat = model.SingleFileDirectoryFormat( - 'AminoHmmMultipleProfilesDirectoryFormat', - r'.*\..hmm', - ProteinHmmMultipleProfilesFileFmt -) -DnaHmmMultipleProfilesDirectoryFormat = model.SingleFileDirectoryFormat( - 'DnaHmmMultipleProfilesDirectoryFormat', - r'.*\..hmm', - DnaHmmMultipleProfilesFileFmt, -) +class RnaSingleProfileHmmDirectoryFmt(model.DirectoryFormat): + profile = model.File(r'.*\.hmm', format=RnaSingleProfileHmmFileFmt) + + +class RnaMultipleProfileHmmDirectoryFmt(model.DirectoryFormat): + profiles = model.File(r'.*\.hmm', format=RnaMultipleProfileHmmFileFmt) -RnaHmmMultipleProfilesDirectoryFormat = model.SingleFileDirectoryFormat( - 'RnaHmmMultipleProfilesDirectoryFormat', - r'.*\..hmm', - RnaHmmMultipleProfilesFileFmt, -) plugin.register_formats( - ProteinHmmMultipleProfilesFileFmt, - DnaHmmMultipleProfilesFileFmt, - RnaHmmMultipleProfilesFileFmt, - ProteinHmmMultipleProfilesDirectoryFormat, - DnaHmmMultipleProfilesDirectoryFormat, - RnaHmmMultipleProfilesDirectoryFormat, - ProteinHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat + PressedProfileHmmsDirectoryFmt, + ProteinSingleProfileHmmDirectoryFmt, + ProteinMultipleProfileHmmDirectoryFmt, + DnaSingleProfileHmmDirectoryFmt, + DnaMultipleProfileHmmDirectoryFmt, + RnaSingleProfileHmmDirectoryFmt, + RnaMultipleProfileHmmDirectoryFmt ) diff --git a/q2_types/profile_hmms/_type.py b/q2_types/profile_hmms/_type.py index 86233016..f34e5e7b 100644 --- a/q2_types/profile_hmms/_type.py +++ b/q2_types/profile_hmms/_type.py @@ -8,11 +8,13 @@ from qiime2.plugin import SemanticType from q2_types.plugin_setup import plugin from q2_types.profile_hmms._format import ( - ProteinHmmMultipleProfilesDirectoryFormat, - DnaHmmMultipleProfilesDirectoryFormat, - RnaHmmMultipleProfilesDirectoryFormat, - ProteinHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat, - BaseHmmPressedDirFmt + PressedProfileHmmsDirectoryFmt, + DnaSingleProfileHmmDirectoryFmt, + DnaMultipleProfileHmmDirectoryFmt, + RnaSingleProfileHmmDirectoryFmt, + RnaMultipleProfileHmmDirectoryFmt, + ProteinSingleProfileHmmDirectoryFmt, + ProteinMultipleProfileHmmDirectoryFmt ) @@ -54,7 +56,7 @@ plugin.register_artifact_class( ProfileHMM[PressedProtein], - directory_format=BaseHmmPressedDirFmt, + directory_format=PressedProfileHmmsDirectoryFmt, description=( "A collection of profile Hidden Markov Models for amino acid " "sequences in binary format and indexed." @@ -63,7 +65,7 @@ plugin.register_artifact_class( ProfileHMM[PressedDNA], - directory_format=BaseHmmPressedDirFmt, + directory_format=PressedProfileHmmsDirectoryFmt, description=( "A collection of profile Hidden Markov Models for DNA " "sequences in binary format and indexed." @@ -72,7 +74,7 @@ plugin.register_artifact_class( ProfileHMM[PressedRNA], - directory_format=BaseHmmPressedDirFmt, + directory_format=PressedProfileHmmsDirectoryFmt, description=( "A collection of profile Hidden Markov Models for RNA " "sequences in binary format and indexed." @@ -81,7 +83,7 @@ plugin.register_artifact_class( ProfileHMM[SingleProtein], - directory_format=ProteinHmmDirectoryFormat, + directory_format=ProteinSingleProfileHmmDirectoryFmt, description=( "One single profile Hidden Markov Model representing a group " "of related proteins." @@ -90,7 +92,7 @@ plugin.register_artifact_class( ProfileHMM[SingleDNA], - directory_format=DnaHmmDirectoryFormat, + directory_format=DnaSingleProfileHmmDirectoryFmt, description=( "One single profile Hidden Markov Model representing a group " "of related DNA sequences." @@ -99,7 +101,7 @@ plugin.register_artifact_class( ProfileHMM[SingleRNA], - directory_format=RnaHmmDirectoryFormat, + directory_format=RnaSingleProfileHmmDirectoryFmt, description=( "One single profile Hidden Markov Model representing a group " "of related RNA sequences." @@ -108,7 +110,7 @@ plugin.register_artifact_class( ProfileHMM[MultipleProtein], - directory_format=ProteinHmmMultipleProfilesDirectoryFormat, + directory_format=ProteinMultipleProfileHmmDirectoryFmt, description=( "A collection of profile Hidden Markov Models, " "each representing a group of related proteins." @@ -117,7 +119,7 @@ plugin.register_artifact_class( ProfileHMM[MultipleDNA], - directory_format=DnaHmmMultipleProfilesDirectoryFormat, + directory_format=DnaMultipleProfileHmmDirectoryFmt, description=( "A collection of profile Hidden Markov Models, " "each representing a group of related DNA sequences." @@ -126,7 +128,7 @@ plugin.register_artifact_class( ProfileHMM[MultipleRNA], - directory_format=RnaHmmMultipleProfilesDirectoryFormat, + directory_format=RnaMultipleProfileHmmDirectoryFmt, description=( "A collection of profile Hidden Markov Models, " "each representing a group of related RNA sequences." diff --git a/q2_types/profile_hmms/tests/test_format.py b/q2_types/profile_hmms/tests/test_format.py index d556c53e..bed78da5 100644 --- a/q2_types/profile_hmms/tests/test_format.py +++ b/q2_types/profile_hmms/tests/test_format.py @@ -7,11 +7,13 @@ # ---------------------------------------------------------------------------- from qiime2.plugin.testing import TestPluginBase from q2_types.profile_hmms._format import ( - BaseHmmPressedDirFmt, - ProteinHmmFileFmt, DnaHmmFileFmt, RnaHmmFileFmt, - ProteinHmmMultipleProfilesFileFmt, - DnaHmmMultipleProfilesFileFmt, - RnaHmmMultipleProfilesFileFmt + PressedProfileHmmsDirectoryFmt, + ProteinMultipleProfileHmmFileFmt, + ProteinSingleProfileHmmFileFmt, + RnaMultipleProfileHmmFileFmt, + RnaSingleProfileHmmFileFmt, + DnaMultipleProfileHmmFileFmt, + DnaSingleProfileHmmFileFmt ) from qiime2.plugin import ValidationError @@ -19,73 +21,89 @@ class TestHmmFormats(TestPluginBase): package = 'q2_types.profile_hmms.tests' - def test_BaseHmmPressedDirFmt_valid(self): - fmt = BaseHmmPressedDirFmt(self.get_data_path("bacteria"), 'r') + def test_PressedProfileHmmsDirectoryFmt_valid(self): + fmt = PressedProfileHmmsDirectoryFmt( + self.get_data_path("bacteria"), 'r' + ) fmt.validate(level="min") - def test_AminoHmmFileFmt_valid(self): - fmt = ProteinHmmFileFmt(self.get_data_path("hmms/amino.hmm"), "r") + def test_ProteinSingleProfileHmmFileFmt_valid(self): + fmt = ProteinSingleProfileHmmFileFmt( + self.get_data_path("hmms/amino.hmm"), "r" + ) fmt.validate() - def test_DnaHmmFileFmt_valid(self): - fmt = DnaHmmFileFmt(self.get_data_path("hmms/dna.hmm"), "r") + def test_DnaSingleProfileHmmFileFmt_valid(self): + fmt = DnaSingleProfileHmmFileFmt( + self.get_data_path("hmms/dna.hmm"), "r", + ) fmt.validate() - def test_RnaHmmFileFmt_valid(self): - fmt = RnaHmmFileFmt(self.get_data_path("hmms/rna.hmm"), "r") + def test_RnaSingleProfileHmmFileFmt_valid(self): + fmt = RnaSingleProfileHmmFileFmt( + self.get_data_path("hmms/rna.hmm"), "r" + ) fmt.validate() - def test_AminoHmmFileFmt_invalid_alph(self): + def test_ProteinSingleProfileHmmFileFmt_invalid_alph(self): for typ in ["rna", "dna"]: - fmt = ProteinHmmFileFmt(self.get_data_path(f"hmms/{typ}.hmm"), "r") + fmt = ProteinSingleProfileHmmFileFmt( + self.get_data_path(f"hmms/{typ}.hmm"), "r", + ) with self.assertRaisesRegex( ValidationError, "Found profile with alphabet " ): fmt.validate() - def test_DnaHmmFileFmt_invalid_alph(self): + def test_DnaSingleProfileHmmFileFmt_invalid_alph(self): for typ in ["rna", "amino"]: - fmt = DnaHmmFileFmt(self.get_data_path(f"hmms/{typ}.hmm"), "r") + fmt = DnaSingleProfileHmmFileFmt( + self.get_data_path(f"hmms/{typ}.hmm"), "r" + ) with self.assertRaisesRegex( ValidationError, "Found profile with alphabet " ): fmt.validate() - def test_RnaHmmFileFmt_invalid_alph(self): + def test_RnaSingleProfileHmmFileFmt_invalid_alph(self): for typ in ["dna", "amino"]: - fmt = RnaHmmFileFmt(self.get_data_path(f"hmms/{typ}.hmm"), "r") + fmt = RnaSingleProfileHmmFileFmt( + self.get_data_path(f"hmms/{typ}.hmm"), "r" + ) with self.assertRaisesRegex( ValidationError, "Found profile with alphabet " ): fmt.validate() - def test_AminoHmmFileFmt_too_many_profiles(self): - fmt = ProteinHmmFileFmt(self.get_data_path("hmms/4_amino.hmm"), "r") + def test_ProteinSingleProfileHmmFileFmt_too_many_profiles(self): + fmt = ProteinSingleProfileHmmFileFmt( + self.get_data_path("hmms/4_amino.hmm"), "r" + ) with self.assertRaisesRegex( ValidationError, "Expected 1 profile, found 4." ): fmt.validate() - def test_AminoHmmMultipleProfilesFileFmt_valid(self): - fmt = ProteinHmmMultipleProfilesFileFmt( - self.get_data_path("hmms/4_amino.hmm"), 'r' + def test_ProteinMultipleProfileHmmFileFmt_valid(self): + fmt = ProteinMultipleProfileHmmFileFmt( + self.get_data_path("hmms/4_amino.hmm"), "r" ) fmt.validate() - def test_DnaHmmMultipleProfilesFileFmt_valid(self): - fmt = DnaHmmMultipleProfilesFileFmt( + def test_DnaMultipleProfileHmmFileFmt_valid(self): + fmt = DnaMultipleProfileHmmFileFmt( self.get_data_path("hmms/2_dna.hmm"), "r" ) fmt.validate() - def test_RnaHmmMultipleProfilesFileFmt_valid(self): - fmt = RnaHmmMultipleProfilesFileFmt( + def test_RnaMultipleProfileHmmFileFmt_valid(self): + fmt = RnaMultipleProfileHmmFileFmt( self.get_data_path("hmms/2_rna.hmm"), "r" ) fmt.validate() def test_mixed_hmm_profiles_invalid_1(self): - fmt = ProteinHmmMultipleProfilesFileFmt( + fmt = ProteinMultipleProfileHmmFileFmt( self.get_data_path("hmms/amino_dna.hmm"), 'r' ) with self.assertRaisesRegex( @@ -94,7 +112,7 @@ def test_mixed_hmm_profiles_invalid_1(self): fmt.validate() def test_mixed_hmm_profiles_invalid_2(self): - fmt = DnaHmmMultipleProfilesFileFmt( + fmt = DnaMultipleProfileHmmFileFmt( self.get_data_path("hmms/rna_dna.hmm"), 'r' ) with self.assertRaisesRegex( diff --git a/q2_types/profile_hmms/tests/test_type.py b/q2_types/profile_hmms/tests/test_type.py index ed040495..4f6640e7 100644 --- a/q2_types/profile_hmms/tests/test_type.py +++ b/q2_types/profile_hmms/tests/test_type.py @@ -7,14 +7,17 @@ # ---------------------------------------------------------------------------- from qiime2.plugin.testing import TestPluginBase from q2_types.profile_hmms._type import ( - ProfileHMM, BaseHmmPressedDirFmt, - ProteinHmmMultipleProfilesDirectoryFormat, - DnaHmmMultipleProfilesDirectoryFormat, - RnaHmmMultipleProfilesDirectoryFormat, - ProteinHmmDirectoryFormat, DnaHmmDirectoryFormat, RnaHmmDirectoryFormat, + ProfileHMM, SingleProtein, SingleDNA, SingleRNA, MultipleProtein, MultipleDNA, MultipleRNA, - PressedProtein, PressedDNA, PressedRNA + PressedProtein, PressedDNA, PressedRNA, + PressedProfileHmmsDirectoryFmt, + DnaSingleProfileHmmDirectoryFmt, + DnaMultipleProfileHmmDirectoryFmt, + RnaSingleProfileHmmDirectoryFmt, + RnaMultipleProfileHmmDirectoryFmt, + ProteinSingleProfileHmmDirectoryFmt, + ProteinMultipleProfileHmmDirectoryFmt ) @@ -26,46 +29,45 @@ def test_hmmer_registration(self): def test_SingleAmino_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( - ProfileHMM[SingleProtein], ProteinHmmDirectoryFormat + ProfileHMM[SingleProtein], ProteinSingleProfileHmmDirectoryFmt ) def test_SingleDNA_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( - ProfileHMM[SingleDNA], DnaHmmDirectoryFormat + ProfileHMM[SingleDNA], DnaSingleProfileHmmDirectoryFmt ) def test_SingleRNA_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( - ProfileHMM[SingleRNA], RnaHmmDirectoryFormat + ProfileHMM[SingleRNA], RnaSingleProfileHmmDirectoryFmt ) def test_MultipleAmino_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( - ProfileHMM[MultipleProtein], - ProteinHmmMultipleProfilesDirectoryFormat + ProfileHMM[MultipleProtein], ProteinMultipleProfileHmmDirectoryFmt ) def test_MultipleDNA_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( - ProfileHMM[MultipleDNA], DnaHmmMultipleProfilesDirectoryFormat + ProfileHMM[MultipleDNA], DnaMultipleProfileHmmDirectoryFmt ) def test_MultipleRNA_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( - ProfileHMM[MultipleRNA], RnaHmmMultipleProfilesDirectoryFormat + ProfileHMM[MultipleRNA], RnaMultipleProfileHmmDirectoryFmt ) def test_MultipleAminoPressed_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( - ProfileHMM[PressedProtein], BaseHmmPressedDirFmt + ProfileHMM[PressedProtein], PressedProfileHmmsDirectoryFmt ) def test_MultipleDNAPressed_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( - ProfileHMM[PressedDNA], BaseHmmPressedDirFmt + ProfileHMM[PressedDNA], PressedProfileHmmsDirectoryFmt ) def test_MultipleRNAPressed_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( - ProfileHMM[PressedRNA], BaseHmmPressedDirFmt + ProfileHMM[PressedRNA], PressedProfileHmmsDirectoryFmt ) From 39887c8dab1ae06a75969dedffedc0c161cc2392 Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Mon, 10 Jun 2024 17:17:02 +0200 Subject: [PATCH 22/28] add to the error messsage of multiple profiles --- q2_types/profile_hmms/_format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/q2_types/profile_hmms/_format.py b/q2_types/profile_hmms/_format.py index d10046f8..a5869f15 100644 --- a/q2_types/profile_hmms/_format.py +++ b/q2_types/profile_hmms/_format.py @@ -47,7 +47,7 @@ def _validate_(self, level: str): except TypeError as e: raise ValidationError( "Found profiles with different alphabets.\n" - f"{e}" + f"Printing pyhmmer error message: {e}" ) if len(hmm_profiles) > 1 and self.single_profile: From 6d127e5c24c267d61d3ecf6cab1e56dc73f89232 Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Tue, 11 Jun 2024 16:01:30 +0200 Subject: [PATCH 23/28] include testing data in setup.py --- q2_types/reference_db/__init__.py | 2 +- setup.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/q2_types/reference_db/__init__.py b/q2_types/reference_db/__init__.py index be66ab92..04b3bf6b 100644 --- a/q2_types/reference_db/__init__.py +++ b/q2_types/reference_db/__init__.py @@ -21,7 +21,7 @@ DiamondDatabaseDirFmt, NCBITaxonomyDirFmt, EggnogProteinSequencesDirFmt - ) +) __all__ = ['ReferenceDB', 'Diamond', 'Eggnog', 'DiamondDatabaseFileFmt', 'DiamondDatabaseDirFmt', 'EggnogRefDirFmt', 'EggnogRefTextFileFmt', diff --git a/setup.py b/setup.py index a760b2d7..c02ab984 100644 --- a/setup.py +++ b/setup.py @@ -89,6 +89,8 @@ ['data/*', 'data/db-valid/*'], 'q2_types.reference_db.tests': ['data/*', 'data/*/*', 'data/*/*/*'], + 'q2_types.profile_hmm.tests': + ['data/*', 'data/*/*'] }, zip_safe=False, ) From 1e450c5e62e8b6d424f629657a2ee33103abcd82 Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Tue, 11 Jun 2024 16:10:00 +0200 Subject: [PATCH 24/28] typo --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c02ab984..94be4dd0 100644 --- a/setup.py +++ b/setup.py @@ -89,7 +89,7 @@ ['data/*', 'data/db-valid/*'], 'q2_types.reference_db.tests': ['data/*', 'data/*/*', 'data/*/*/*'], - 'q2_types.profile_hmm.tests': + 'q2_types.profile_hmms.tests': ['data/*', 'data/*/*'] }, zip_safe=False, From efc32b200e1f376df41d293b5fd3bd30b0c45e50 Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Thu, 20 Jun 2024 09:58:05 +0200 Subject: [PATCH 25/28] class varaible in **ProfileHmmFileFmt() from single to single_profile --- q2_types/profile_hmms/_format.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/q2_types/profile_hmms/_format.py b/q2_types/profile_hmms/_format.py index a5869f15..825a2b5c 100644 --- a/q2_types/profile_hmms/_format.py +++ b/q2_types/profile_hmms/_format.py @@ -50,7 +50,7 @@ def _validate_(self, level: str): f"Printing pyhmmer error message: {e}" ) - if len(hmm_profiles) > 1 and self.single_profile: + if len(hmm_profiles) > 1 and self.single: raise ValidationError( f"Expected 1 profile, found {len(hmm_profiles)}." ) @@ -71,11 +71,11 @@ class ProteinProfileHmmFileFmt(ProfileHmmFileFmt): class ProteinSingleProfileHmmFileFmt(ProteinProfileHmmFileFmt): - single_profile = True + single = True class ProteinMultipleProfileHmmFileFmt(ProteinProfileHmmFileFmt): - single_profile = False + single = False class DnaProfileHmmFileFmt(ProfileHmmFileFmt): @@ -83,11 +83,11 @@ class DnaProfileHmmFileFmt(ProfileHmmFileFmt): class DnaSingleProfileHmmFileFmt(DnaProfileHmmFileFmt): - single_profile = True + single = True class DnaMultipleProfileHmmFileFmt(DnaProfileHmmFileFmt): - single_profile = False + single = False class RnaProfileHmmFileFmt(ProfileHmmFileFmt): @@ -95,11 +95,11 @@ class RnaProfileHmmFileFmt(ProfileHmmFileFmt): class RnaSingleProfileHmmFileFmt(RnaProfileHmmFileFmt): - single_profile = True + single = True class RnaMultipleProfileHmmFileFmt(RnaProfileHmmFileFmt): - single_profile = False + single = False class ProteinSingleProfileHmmDirectoryFmt(model.DirectoryFormat): From 054d5da959252c2014275e489fb9a713fb961502 Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Thu, 20 Jun 2024 10:13:44 +0200 Subject: [PATCH 26/28] change format names from D/Rna to D/RNA --- q2_types/profile_hmms/__init__.py | 16 +++++----- q2_types/profile_hmms/_format.py | 36 +++++++++++----------- q2_types/profile_hmms/_type.py | 16 +++++----- q2_types/profile_hmms/tests/test_format.py | 34 ++++++++++---------- q2_types/profile_hmms/tests/test_type.py | 16 +++++----- 5 files changed, 59 insertions(+), 59 deletions(-) diff --git a/q2_types/profile_hmms/__init__.py b/q2_types/profile_hmms/__init__.py index 0ce79c71..af6e8488 100644 --- a/q2_types/profile_hmms/__init__.py +++ b/q2_types/profile_hmms/__init__.py @@ -9,10 +9,10 @@ PressedProfileHmmsDirectoryFmt, ProteinSingleProfileHmmDirectoryFmt, ProteinMultipleProfileHmmDirectoryFmt, - DnaSingleProfileHmmDirectoryFmt, - DnaMultipleProfileHmmDirectoryFmt, - RnaSingleProfileHmmDirectoryFmt, - RnaMultipleProfileHmmDirectoryFmt + DNASingleProfileHmmDirectoryFmt, + DNAMultipleProfileHmmDirectoryFmt, + RNASingleProfileHmmDirectoryFmt, + RNAMultipleProfileHmmDirectoryFmt ) from ._type import ( ProfileHMM, @@ -27,10 +27,10 @@ "ProfileHmmFileFmt", "ProteinSingleProfileHmmDirectoryFmt", "ProteinMultipleProfileHmmDirectoryFmt", - "DnaSingleProfileHmmDirectoryFmt", - "DnaMultipleProfileHmmDirectoryFmt", - "RnaSingleProfileHmmDirectoryFmt", - "RnaMultipleProfileHmmDirectoryFmt", + "DNASingleProfileHmmDirectoryFmt", + "DNAMultipleProfileHmmDirectoryFmt", + "RNASingleProfileHmmDirectoryFmt", + "RNAMultipleProfileHmmDirectoryFmt", "ProfileHMM", "SingleProtein", "SingleDNA", "SingleRNA", "MultipleProtein", "MultipleDNA", "MultipleRNA", diff --git a/q2_types/profile_hmms/_format.py b/q2_types/profile_hmms/_format.py index 825a2b5c..d1126640 100644 --- a/q2_types/profile_hmms/_format.py +++ b/q2_types/profile_hmms/_format.py @@ -78,27 +78,27 @@ class ProteinMultipleProfileHmmFileFmt(ProteinProfileHmmFileFmt): single = False -class DnaProfileHmmFileFmt(ProfileHmmFileFmt): +class DNAProfileHmmFileFmt(ProfileHmmFileFmt): alphabet = "dna" -class DnaSingleProfileHmmFileFmt(DnaProfileHmmFileFmt): +class DNASingleProfileHmmFileFmt(DNAProfileHmmFileFmt): single = True -class DnaMultipleProfileHmmFileFmt(DnaProfileHmmFileFmt): +class DNAMultipleProfileHmmFileFmt(DNAProfileHmmFileFmt): single = False -class RnaProfileHmmFileFmt(ProfileHmmFileFmt): +class RNAProfileHmmFileFmt(ProfileHmmFileFmt): alphabet = "rna" -class RnaSingleProfileHmmFileFmt(RnaProfileHmmFileFmt): +class RNASingleProfileHmmFileFmt(RNAProfileHmmFileFmt): single = True -class RnaMultipleProfileHmmFileFmt(RnaProfileHmmFileFmt): +class RNAMultipleProfileHmmFileFmt(RNAProfileHmmFileFmt): single = False @@ -110,28 +110,28 @@ class ProteinMultipleProfileHmmDirectoryFmt(model.DirectoryFormat): profiles = model.File(r'.*\.hmm', format=ProteinMultipleProfileHmmFileFmt) -class DnaSingleProfileHmmDirectoryFmt(model.DirectoryFormat): - profile = model.File(r'.*\.hmm', format=DnaSingleProfileHmmFileFmt) +class DNASingleProfileHmmDirectoryFmt(model.DirectoryFormat): + profile = model.File(r'.*\.hmm', format=DNASingleProfileHmmFileFmt) -class DnaMultipleProfileHmmDirectoryFmt(model.DirectoryFormat): - profiles = model.File(r'.*\.hmm', format=DnaMultipleProfileHmmFileFmt) +class DNAMultipleProfileHmmDirectoryFmt(model.DirectoryFormat): + profiles = model.File(r'.*\.hmm', format=DNAMultipleProfileHmmFileFmt) -class RnaSingleProfileHmmDirectoryFmt(model.DirectoryFormat): - profile = model.File(r'.*\.hmm', format=RnaSingleProfileHmmFileFmt) +class RNASingleProfileHmmDirectoryFmt(model.DirectoryFormat): + profile = model.File(r'.*\.hmm', format=RNASingleProfileHmmFileFmt) -class RnaMultipleProfileHmmDirectoryFmt(model.DirectoryFormat): - profiles = model.File(r'.*\.hmm', format=RnaMultipleProfileHmmFileFmt) +class RNAMultipleProfileHmmDirectoryFmt(model.DirectoryFormat): + profiles = model.File(r'.*\.hmm', format=RNAMultipleProfileHmmFileFmt) plugin.register_formats( PressedProfileHmmsDirectoryFmt, ProteinSingleProfileHmmDirectoryFmt, ProteinMultipleProfileHmmDirectoryFmt, - DnaSingleProfileHmmDirectoryFmt, - DnaMultipleProfileHmmDirectoryFmt, - RnaSingleProfileHmmDirectoryFmt, - RnaMultipleProfileHmmDirectoryFmt + DNASingleProfileHmmDirectoryFmt, + DNAMultipleProfileHmmDirectoryFmt, + RNASingleProfileHmmDirectoryFmt, + RNAMultipleProfileHmmDirectoryFmt ) diff --git a/q2_types/profile_hmms/_type.py b/q2_types/profile_hmms/_type.py index f34e5e7b..98d05e25 100644 --- a/q2_types/profile_hmms/_type.py +++ b/q2_types/profile_hmms/_type.py @@ -9,10 +9,10 @@ from q2_types.plugin_setup import plugin from q2_types.profile_hmms._format import ( PressedProfileHmmsDirectoryFmt, - DnaSingleProfileHmmDirectoryFmt, - DnaMultipleProfileHmmDirectoryFmt, - RnaSingleProfileHmmDirectoryFmt, - RnaMultipleProfileHmmDirectoryFmt, + DNASingleProfileHmmDirectoryFmt, + DNAMultipleProfileHmmDirectoryFmt, + RNASingleProfileHmmDirectoryFmt, + RNAMultipleProfileHmmDirectoryFmt, ProteinSingleProfileHmmDirectoryFmt, ProteinMultipleProfileHmmDirectoryFmt ) @@ -92,7 +92,7 @@ plugin.register_artifact_class( ProfileHMM[SingleDNA], - directory_format=DnaSingleProfileHmmDirectoryFmt, + directory_format=DNASingleProfileHmmDirectoryFmt, description=( "One single profile Hidden Markov Model representing a group " "of related DNA sequences." @@ -101,7 +101,7 @@ plugin.register_artifact_class( ProfileHMM[SingleRNA], - directory_format=RnaSingleProfileHmmDirectoryFmt, + directory_format=RNASingleProfileHmmDirectoryFmt, description=( "One single profile Hidden Markov Model representing a group " "of related RNA sequences." @@ -119,7 +119,7 @@ plugin.register_artifact_class( ProfileHMM[MultipleDNA], - directory_format=DnaMultipleProfileHmmDirectoryFmt, + directory_format=DNAMultipleProfileHmmDirectoryFmt, description=( "A collection of profile Hidden Markov Models, " "each representing a group of related DNA sequences." @@ -128,7 +128,7 @@ plugin.register_artifact_class( ProfileHMM[MultipleRNA], - directory_format=RnaMultipleProfileHmmDirectoryFmt, + directory_format=RNAMultipleProfileHmmDirectoryFmt, description=( "A collection of profile Hidden Markov Models, " "each representing a group of related RNA sequences." diff --git a/q2_types/profile_hmms/tests/test_format.py b/q2_types/profile_hmms/tests/test_format.py index bed78da5..b64e33c1 100644 --- a/q2_types/profile_hmms/tests/test_format.py +++ b/q2_types/profile_hmms/tests/test_format.py @@ -10,10 +10,10 @@ PressedProfileHmmsDirectoryFmt, ProteinMultipleProfileHmmFileFmt, ProteinSingleProfileHmmFileFmt, - RnaMultipleProfileHmmFileFmt, - RnaSingleProfileHmmFileFmt, - DnaMultipleProfileHmmFileFmt, - DnaSingleProfileHmmFileFmt + RNAMultipleProfileHmmFileFmt, + RNASingleProfileHmmFileFmt, + DNAMultipleProfileHmmFileFmt, + DNASingleProfileHmmFileFmt ) from qiime2.plugin import ValidationError @@ -33,14 +33,14 @@ def test_ProteinSingleProfileHmmFileFmt_valid(self): ) fmt.validate() - def test_DnaSingleProfileHmmFileFmt_valid(self): - fmt = DnaSingleProfileHmmFileFmt( + def test_DNASingleProfileHmmFileFmt_valid(self): + fmt = DNASingleProfileHmmFileFmt( self.get_data_path("hmms/dna.hmm"), "r", ) fmt.validate() - def test_RnaSingleProfileHmmFileFmt_valid(self): - fmt = RnaSingleProfileHmmFileFmt( + def test_RNASingleProfileHmmFileFmt_valid(self): + fmt = RNASingleProfileHmmFileFmt( self.get_data_path("hmms/rna.hmm"), "r" ) fmt.validate() @@ -55,9 +55,9 @@ def test_ProteinSingleProfileHmmFileFmt_invalid_alph(self): ): fmt.validate() - def test_DnaSingleProfileHmmFileFmt_invalid_alph(self): + def test_DNASingleProfileHmmFileFmt_invalid_alph(self): for typ in ["rna", "amino"]: - fmt = DnaSingleProfileHmmFileFmt( + fmt = DNASingleProfileHmmFileFmt( self.get_data_path(f"hmms/{typ}.hmm"), "r" ) with self.assertRaisesRegex( @@ -65,9 +65,9 @@ def test_DnaSingleProfileHmmFileFmt_invalid_alph(self): ): fmt.validate() - def test_RnaSingleProfileHmmFileFmt_invalid_alph(self): + def test_RNASingleProfileHmmFileFmt_invalid_alph(self): for typ in ["dna", "amino"]: - fmt = RnaSingleProfileHmmFileFmt( + fmt = RNASingleProfileHmmFileFmt( self.get_data_path(f"hmms/{typ}.hmm"), "r" ) with self.assertRaisesRegex( @@ -90,14 +90,14 @@ def test_ProteinMultipleProfileHmmFileFmt_valid(self): ) fmt.validate() - def test_DnaMultipleProfileHmmFileFmt_valid(self): - fmt = DnaMultipleProfileHmmFileFmt( + def test_DNAMultipleProfileHmmFileFmt_valid(self): + fmt = DNAMultipleProfileHmmFileFmt( self.get_data_path("hmms/2_dna.hmm"), "r" ) fmt.validate() - def test_RnaMultipleProfileHmmFileFmt_valid(self): - fmt = RnaMultipleProfileHmmFileFmt( + def test_RNAMultipleProfileHmmFileFmt_valid(self): + fmt = RNAMultipleProfileHmmFileFmt( self.get_data_path("hmms/2_rna.hmm"), "r" ) fmt.validate() @@ -112,7 +112,7 @@ def test_mixed_hmm_profiles_invalid_1(self): fmt.validate() def test_mixed_hmm_profiles_invalid_2(self): - fmt = DnaMultipleProfileHmmFileFmt( + fmt = DNAMultipleProfileHmmFileFmt( self.get_data_path("hmms/rna_dna.hmm"), 'r' ) with self.assertRaisesRegex( diff --git a/q2_types/profile_hmms/tests/test_type.py b/q2_types/profile_hmms/tests/test_type.py index 4f6640e7..dd4c9b29 100644 --- a/q2_types/profile_hmms/tests/test_type.py +++ b/q2_types/profile_hmms/tests/test_type.py @@ -12,10 +12,10 @@ MultipleProtein, MultipleDNA, MultipleRNA, PressedProtein, PressedDNA, PressedRNA, PressedProfileHmmsDirectoryFmt, - DnaSingleProfileHmmDirectoryFmt, - DnaMultipleProfileHmmDirectoryFmt, - RnaSingleProfileHmmDirectoryFmt, - RnaMultipleProfileHmmDirectoryFmt, + DNASingleProfileHmmDirectoryFmt, + DNAMultipleProfileHmmDirectoryFmt, + RNASingleProfileHmmDirectoryFmt, + RNAMultipleProfileHmmDirectoryFmt, ProteinSingleProfileHmmDirectoryFmt, ProteinMultipleProfileHmmDirectoryFmt ) @@ -34,12 +34,12 @@ def test_SingleAmino_semantic_type_registered_to_DirFmt(self): def test_SingleDNA_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( - ProfileHMM[SingleDNA], DnaSingleProfileHmmDirectoryFmt + ProfileHMM[SingleDNA], DNASingleProfileHmmDirectoryFmt ) def test_SingleRNA_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( - ProfileHMM[SingleRNA], RnaSingleProfileHmmDirectoryFmt + ProfileHMM[SingleRNA], RNASingleProfileHmmDirectoryFmt ) def test_MultipleAmino_semantic_type_registered_to_DirFmt(self): @@ -49,12 +49,12 @@ def test_MultipleAmino_semantic_type_registered_to_DirFmt(self): def test_MultipleDNA_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( - ProfileHMM[MultipleDNA], DnaMultipleProfileHmmDirectoryFmt + ProfileHMM[MultipleDNA], DNAMultipleProfileHmmDirectoryFmt ) def test_MultipleRNA_semantic_type_registered_to_DirFmt(self): self.assertSemanticTypeRegisteredToFormat( - ProfileHMM[MultipleRNA], RnaMultipleProfileHmmDirectoryFmt + ProfileHMM[MultipleRNA], RNAMultipleProfileHmmDirectoryFmt ) def test_MultipleAminoPressed_semantic_type_registered_to_DirFmt(self): From 24f148a917e989d430b17a613cf5a8fefa6b9301 Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Fri, 21 Jun 2024 15:06:38 +0200 Subject: [PATCH 27/28] update mixed_hmm_profiles tests compatible with pyhmmer 0.10.13 --- q2_types/profile_hmms/_format.py | 8 +------- q2_types/profile_hmms/tests/test_format.py | 7 ++++--- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/q2_types/profile_hmms/_format.py b/q2_types/profile_hmms/_format.py index d1126640..bd739143 100644 --- a/q2_types/profile_hmms/_format.py +++ b/q2_types/profile_hmms/_format.py @@ -42,13 +42,7 @@ def _validate_(self, level: str): tolerance = 0.0001 with HMMFile(str(self)) as hmm_file: - try: - hmm_profiles = list(hmm_file) - except TypeError as e: - raise ValidationError( - "Found profiles with different alphabets.\n" - f"Printing pyhmmer error message: {e}" - ) + hmm_profiles = list(hmm_file) if len(hmm_profiles) > 1 and self.single: raise ValidationError( diff --git a/q2_types/profile_hmms/tests/test_format.py b/q2_types/profile_hmms/tests/test_format.py index b64e33c1..65bc4ebf 100644 --- a/q2_types/profile_hmms/tests/test_format.py +++ b/q2_types/profile_hmms/tests/test_format.py @@ -5,6 +5,7 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- +import pyhmmer from qiime2.plugin.testing import TestPluginBase from q2_types.profile_hmms._format import ( PressedProfileHmmsDirectoryFmt, @@ -107,15 +108,15 @@ def test_mixed_hmm_profiles_invalid_1(self): self.get_data_path("hmms/amino_dna.hmm"), 'r' ) with self.assertRaisesRegex( - ValidationError, "Found profiles with different alphabets." + pyhmmer.errors.AlphabetMismatch, "Expected amino alphabet" ): fmt.validate() def test_mixed_hmm_profiles_invalid_2(self): - fmt = DNAMultipleProfileHmmFileFmt( + fmt = RNAMultipleProfileHmmFileFmt( self.get_data_path("hmms/rna_dna.hmm"), 'r' ) with self.assertRaisesRegex( - ValidationError, "Found profiles with different alphabets." + pyhmmer.errors.AlphabetMismatch, "Expected RNA alphabet" ): fmt.validate() From ff5e807e43e27defcfec83a882692c235373b763 Mon Sep 17 00:00:00 2001 From: Santiago Castro Dau Date: Tue, 25 Jun 2024 13:44:41 +0200 Subject: [PATCH 28/28] catch AlphabetMismatch as ValidationError --- q2_types/profile_hmms/_format.py | 9 ++++++++- q2_types/profile_hmms/tests/test_format.py | 7 ++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/q2_types/profile_hmms/_format.py b/q2_types/profile_hmms/_format.py index bd739143..3814a172 100644 --- a/q2_types/profile_hmms/_format.py +++ b/q2_types/profile_hmms/_format.py @@ -6,6 +6,7 @@ # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- from pyhmmer.plan7 import HMMFile +from pyhmmer.errors import AlphabetMismatch from qiime2.plugin import model from qiime2.core.exceptions import ValidationError from q2_types.plugin_setup import plugin @@ -42,7 +43,13 @@ def _validate_(self, level: str): tolerance = 0.0001 with HMMFile(str(self)) as hmm_file: - hmm_profiles = list(hmm_file) + try: + hmm_profiles = list(hmm_file) + except AlphabetMismatch: + raise ValidationError( + "Found profiles with alphabet different from " + f"'{self.alphabet}'" + ) if len(hmm_profiles) > 1 and self.single: raise ValidationError( diff --git a/q2_types/profile_hmms/tests/test_format.py b/q2_types/profile_hmms/tests/test_format.py index 65bc4ebf..6ea9cfb9 100644 --- a/q2_types/profile_hmms/tests/test_format.py +++ b/q2_types/profile_hmms/tests/test_format.py @@ -5,7 +5,6 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- -import pyhmmer from qiime2.plugin.testing import TestPluginBase from q2_types.profile_hmms._format import ( PressedProfileHmmsDirectoryFmt, @@ -108,7 +107,8 @@ def test_mixed_hmm_profiles_invalid_1(self): self.get_data_path("hmms/amino_dna.hmm"), 'r' ) with self.assertRaisesRegex( - pyhmmer.errors.AlphabetMismatch, "Expected amino alphabet" + ValidationError, + "Found profiles with alphabet different from 'amino'" ): fmt.validate() @@ -117,6 +117,7 @@ def test_mixed_hmm_profiles_invalid_2(self): self.get_data_path("hmms/rna_dna.hmm"), 'r' ) with self.assertRaisesRegex( - pyhmmer.errors.AlphabetMismatch, "Expected RNA alphabet" + ValidationError, + "Found profiles with alphabet different from 'rna'" ): fmt.validate()