Skip to content

Commit

Permalink
TEST: updating transformer tests that call format validation (#335)
Browse files Browse the repository at this point in the history
  • Loading branch information
lizgehret authored May 28, 2024
1 parent fa95a39 commit 18460e4
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 26 deletions.
4 changes: 4 additions & 0 deletions q2_types/feature_data/_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,10 @@ def _fastaformats_to_series(ff, constructor=skbio.DNA, lowercase=False):
for sequence in _read_from_fasta(str(ff), constructor,
lowercase=lowercase):
id_ = sequence.metadata['id']
# this may no longer do anything b/c of format validation, but leaving
# here as a safeguard & we may want to examine/address later
# relevant PR associated with this change:
# https://github.com/qiime2/q2-types/pull/335
if id_ in data:
raise ValueError("FASTA format sequence IDs must be unique. The "
"following ID was found more than once: %s."
Expand Down
51 changes: 31 additions & 20 deletions q2_types/feature_data/tests/test_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,8 +679,10 @@ def test_series_to_dnafasta_format(self):

def test_dnafasta_format_with_duplicate_ids_to_series(self):
with self.assertRaisesRegex(ValueError, 'unique.*SEQUENCE1'):
self.transform_format(DNAFASTAFormat, pd.Series,
'dna-sequences-with-duplicate-ids.fasta')
transformer = self.get_transformer(DNAFASTAFormat, pd.Series)
input = self.get_data_path(
'dna-sequences-with-duplicate-ids.fasta')
transformer(input)

def test_dnafasta_format_to_metadata(self):
_, obs = self.transform_format(DNAFASTAFormat, qiime2.Metadata,
Expand Down Expand Up @@ -901,8 +903,10 @@ def test_series_to_rnafasta_format(self):

def test_rnafasta_format_with_duplicate_ids_to_series(self):
with self.assertRaisesRegex(ValueError, 'unique.*RNASEQUENCE1'):
self.transform_format(RNAFASTAFormat, pd.Series,
'rna-sequences-with-duplicate-ids.fasta')
transformer = self.get_transformer(RNAFASTAFormat, pd.Series)
input = self.get_data_path(
'rna-sequences-with-duplicate-ids.fasta')
transformer(input)

def test_rnafasta_format_to_metadata(self):
_, obs = self.transform_format(RNAFASTAFormat, qiime2.Metadata,
Expand Down Expand Up @@ -1011,9 +1015,11 @@ def test_mixed_case_dna_fasta_format_to_series(self):

def test_mixed_case_dna_fasta_format_with_duplicate_ids_to_series(self):
with self.assertRaisesRegex(ValueError, 'unique.*SEQUENCE1'):
self.transform_format(
MixedCaseDNAFASTAFormat, pd.Series,
'dna-sequences-mixed-case-with-duplicate-ids.fasta')
transformer = self.get_transformer(
MixedCaseDNAFASTAFormat, pd.Series)
input = self.get_data_path(
'dna-sequences-mixed-case-with-duplicate-ids.fasta')
transformer(input)

def test_mixed_case_dna_fasta_format_to_metadata(self):
_, obs = self.transform_format(MixedCaseDNAFASTAFormat,
Expand Down Expand Up @@ -1077,9 +1083,11 @@ def test_mixed_case_rna_fasta_format_to_series(self):

def test_mixed_case_rna_fasta_format_with_duplicate_ids_to_series(self):
with self.assertRaisesRegex(ValueError, 'unique.*SEQUENCE1'):
self.transform_format(
MixedCaseRNAFASTAFormat, pd.Series,
'rna-sequences-mixed-case-with-duplicate-ids.fasta')
transformer = self.get_transformer(
MixedCaseRNAFASTAFormat, pd.Series)
input = self.get_data_path(
'rna-sequences-mixed-case-with-duplicate-ids.fasta')
transformer(input)

def test_mixed_case_rna_fasta_format_to_metadata(self):
_, obs = self.transform_format(MixedCaseRNAFASTAFormat,
Expand Down Expand Up @@ -1145,9 +1153,11 @@ def test_mixed_case_aln_dna_fasta_format_to_series(self):

def test_mixed_case_aln_dna_fasta_format_w_duplicate_ids_to_series(self):
with self.assertRaisesRegex(ValueError, 'unique.*SEQUENCE1'):
self.transform_format(
MixedCaseAlignedDNAFASTAFormat, pd.Series,
'dna-sequences-mixed-case-with-duplicate-ids.fasta')
transformer = self.get_transformer(
MixedCaseAlignedDNAFASTAFormat, pd.Series)
input = self.get_data_path(
'dna-sequences-mixed-case-with-duplicate-ids.fasta')
transformer(input)

def test_mixed_case_aln_dna_fasta_format_to_metadata(self):
_, obs = self.transform_format(
Expand Down Expand Up @@ -1215,9 +1225,11 @@ def test_mixed_case_aln_rna_fasta_format_to_series(self):

def test_mixed_case_aln_rna_fasta_format_w_duplicate_ids_to_series(self):
with self.assertRaisesRegex(ValueError, 'unique.*SEQUENCE1'):
self.transform_format(
MixedCaseAlignedRNAFASTAFormat, pd.Series,
'rna-sequences-mixed-case-with-duplicate-ids.fasta')
transformer = self.get_transformer(
MixedCaseAlignedRNAFASTAFormat, pd.Series)
input = self.get_data_path(
'rna-sequences-mixed-case-with-duplicate-ids.fasta')
transformer(input)

def test_mixed_case_aln_rna_fasta_format_to_metadata(self):
_, obs = self.transform_format(
Expand Down Expand Up @@ -1402,10 +1414,9 @@ def test_series_to_proteinfasta_format(self):

def test_proteinfasta_format_with_duplicate_ids_to_series(self):
with self.assertRaisesRegex(ValueError, 'unique.*sequence1'):
self.transform_format(
ProteinFASTAFormat,
pd.Series,
'protein-sequences-duplicate-ids.fasta')
transformer = self.get_transformer(ProteinFASTAFormat, pd.Series)
input = self.get_data_path('protein-sequences-duplicate-ids.fasta')
transformer(input)

def test_proteinfasta_format_to_metadata(self):
_, obs = self.transform_format(ProteinFASTAFormat, qiime2.Metadata,
Expand Down
7 changes: 4 additions & 3 deletions q2_types/metadata/tests/test_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@ def test_metadata_format_to_metadata(self):
self.assertEqual(obs, exp_md)

def test_non_metadata(self):
filename = 'invalid-metadata-1.tsv'
with self.assertRaisesRegex(MetadataFileError,
"column name 'bad-id-label'"):
self.transform_format(ImmutableMetadataFormat, qiime2.Metadata,
filename)
transformer = self.get_transformer(
ImmutableMetadataFormat, qiime2.Metadata)
input = self.get_data_path('invalid-metadata-1.tsv')
transformer(input)

def test_metadata_to_metadata_format(self):
filename = 'metadata.tsv'
Expand Down
6 changes: 3 additions & 3 deletions q2_types/per_sample_sequences/tests/test_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def test_casava_one_eight_laneless_per_sample_dirfmt_to_slpspefd(self):

input, dirfmt = self.transform_format(
CasavaOneEightLanelessPerSampleDirFmt,
SingleLanePerSamplePairedEndFastqDirFmt, filenames=filenames
SingleLanePerSampleSingleEndFastqDirFmt, filenames=filenames
)
expected_filepaths = ['Human-Kneecap_S1_L001_R1_001.fastq.gz',
'Human-Armpit_S2_L001_R1_001.fastq.gz']
Expand Down Expand Up @@ -179,7 +179,7 @@ def test_casava_one_eight_single_lane_per_sample_dirfmt_to_slpspefdf(self):
filenames = ('Human-Kneecap_S1_L001_R1_001.fastq.gz',)
input, obs = self.transform_format(
CasavaOneEightSingleLanePerSampleDirFmt,
SingleLanePerSamplePairedEndFastqDirFmt, filenames=filenames
SingleLanePerSampleSingleEndFastqDirFmt, filenames=filenames
)

input = skbio.io.read(
Expand Down Expand Up @@ -216,7 +216,7 @@ def test_miseq_demux_dirfmt_to_slpssefdf(self):
def test_miseq_demux_dirfmt_to_slpspefdf(self):
input, obs = self.transform_format(
CasavaOneEightLanelessPerSampleDirFmt,
SingleLanePerSamplePairedEndFastqDirFmt,
SingleLanePerSampleSingleEndFastqDirFmt,
filenames=('Human-Kneecap_S1_R1_001.fastq.gz',),
)

Expand Down

0 comments on commit 18460e4

Please sign in to comment.