From 3e94d350bcf05a521424163a3a6170e51fe33b24 Mon Sep 17 00:00:00 2001 From: Matin Nuhamunada Date: Thu, 25 Apr 2024 04:22:34 +0000 Subject: [PATCH] chore: use genbank acc for test --- .../lactobacillus_delbruecki/samples.csv | 2 +- .../data/interim/gtdb/GCA_000056065.1.json | 22 +++ .../data/interim/gtdb/GCF_000056065.1.json | 176 ------------------ 3 files changed, 23 insertions(+), 177 deletions(-) create mode 100644 .tests/unit/gtdb_prep/expected/data/interim/gtdb/GCA_000056065.1.json delete mode 100644 .tests/unit/gtdb_prep/expected/data/interim/gtdb/GCF_000056065.1.json diff --git a/.tests/unit/gtdb_prep/data/config/lactobacillus_delbruecki/samples.csv b/.tests/unit/gtdb_prep/data/config/lactobacillus_delbruecki/samples.csv index 85cbc525..35e961e8 100644 --- a/.tests/unit/gtdb_prep/data/config/lactobacillus_delbruecki/samples.csv +++ b/.tests/unit/gtdb_prep/data/config/lactobacillus_delbruecki/samples.csv @@ -1,5 +1,5 @@ genome_id,source,organism,genus,species,strain,closest_placement_reference,input_file -GCF_000056065.1,ncbi,,,,,, +GCA_000056065.1,ncbi,,,,,, GCF_000182835.1,ncbi,,,,,, GCA_000191165.1,ncbi,,,,,, GCF_000014405.1,ncbi,,,,,, diff --git a/.tests/unit/gtdb_prep/expected/data/interim/gtdb/GCA_000056065.1.json b/.tests/unit/gtdb_prep/expected/data/interim/gtdb/GCA_000056065.1.json new file mode 100644 index 00000000..bad842ba --- /dev/null +++ b/.tests/unit/gtdb_prep/expected/data/interim/gtdb/GCA_000056065.1.json @@ -0,0 +1,22 @@ +{ + "GCA_000056065.1":{ + "assembly":"ASM5606v1", + "organism":"Lactobacillus delbrueckii subsp. bulgaricus ATCC 11842 = JCM 1002 (firmicutes)", + "genus":"Lactobacillus", + "species":"delbrueckii", + "strain":"ATCC 11842", + "tax_id":"390333", + "refseq_category":null, + "refseq":"GCF_000056065.1", + "genbank":"GCA_000056065.1", + "assembly_type":"na", + "release_type":"major", + "assembly_level":"Complete Genome", + "genome_representation":"full", + "refseq_genbank_identity":"yes", + "biosample":"SAMEA3138258", + "submitter":"Genoscope", + "date":"2006-05-26", + "BioProject":"PRJNA16871" + } +} \ No newline at end of file diff --git a/.tests/unit/gtdb_prep/expected/data/interim/gtdb/GCF_000056065.1.json b/.tests/unit/gtdb_prep/expected/data/interim/gtdb/GCF_000056065.1.json deleted file mode 100644 index 0b6d9dcc..00000000 --- a/.tests/unit/gtdb_prep/expected/data/interim/gtdb/GCF_000056065.1.json +++ /dev/null @@ -1,176 +0,0 @@ -{ - "genome_id": "GCF_000056065.1", - "gtdb_url": "https://gtdb-api.ecogenomic.org/genome/GCF_000056065.1/taxon-history", - "gtdb_release": "R214", - "gtdb_taxonomy": { - "domain": "d__Bacteria", - "phylum": "p__Bacillota", - "class": "c__Bacilli", - "order": "o__Lactobacillales", - "family": "f__Lactobacillaceae", - "genus": "g__Lactobacillus", - "species": "s__Lactobacillus delbrueckii" - }, - "metadata_url": "https://gtdb-api.ecogenomic.org/genome/GCF_000056065.1/card", - "metadata": { - "genome": { - "accession": "GCA_000056065.1", - "name": "GCF_000056065.1" - }, - "metadata_nucleotide": { - "trna_aa_count": 19, - "contig_count": 1, - "n50_contigs": 1864998, - "longest_contig": 1864998, - "scaffold_count": 1, - "n50_scaffolds": 1864998, - "longest_scaffold": 1864998, - "genome_size": 1864998, - "gc_percentage": 49.71903455124348, - "ambiguous_bases": 0 - }, - "metadata_gene": { - "checkm_completeness": "98.38", - "checkm_contamination": "0.0", - "checkm_strain_heterogeneity": "0.0", - "lsu_5s_count": "9", - "ssu_count": "9", - "lsu_23s_count": "9", - "protein_count": "1916", - "coding_density": "84.01837428243891" - }, - "metadata_ncbi": { - "ncbi_genbank_assembly_accession": "GCA_000056065.1", - "ncbi_strain_identifiers": "ATCC 11842", - "ncbi_assembly_level": "Complete Genome", - "ncbi_assembly_name": "ASM5606v1", - "ncbi_assembly_type": "na", - "ncbi_bioproject": "PRJNA224116", - "ncbi_biosample": "SAMEA3138258", - "ncbi_country": null, - "ncbi_date": "2006-05-26", - "ncbi_genome_category": null, - "ncbi_genome_representation": "full", - "ncbi_isolate": null, - "ncbi_isolation_source": null, - "ncbi_lat_lon": null, - "ncbi_molecule_count": "1", - "ncbi_cds_count": "1851", - "ncbi_refseq_category": "na", - "ncbi_seq_rel_date": "2006/05/26", - "ncbi_spanned_gaps": "0", - "ncbi_species_taxid": "1584", - "ncbi_ssu_count": "9", - "ncbi_submitter": "Genoscope", - "ncbi_taxid": "390333", - "ncbi_total_gap_length": "0", - "ncbi_translation_table": "11", - "ncbi_trna_count": "95", - "ncbi_unspanned_gaps": "0", - "ncbi_version_status": "latest", - "ncbi_wgs_master": null - }, - "metadata_type_material": { - "gtdbTypeDesignation": "type strain of subspecies", - "gtdbTypeDesignationSources": "LPSN", - "lpsnTypeDesignation": "type strain of subspecies", - "dsmzTypeDesignation": "type strain of subspecies", - "lpsnPriorityYear": 1919, - "gtdbTypeSpeciesOfGenus": false - }, - "metadataTaxonomy": { - "ncbi_taxonomy": "d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactobacillus;s__Lactobacillus delbrueckii", - "ncbi_taxonomy_unfiltered": "d__Bacteria;x__Terrabacteria group;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactobacillus;s__Lactobacillus delbrueckii;sb__Lactobacillus delbrueckii subsp. bulgaricus;x__Lactobacillus delbrueckii subsp. bulgaricus ATCC 11842 = JCM 1002", - "gtdb_representative": false, - "gtdb_genome_representative": "RS_GCF_001433875.1", - "ncbi_type_material_designation": "assembly from type material", - "gtdbDomain": "d__Bacteria", - "gtdbPhylum": "p__Bacillota", - "gtdbClass": "c__Bacilli", - "gtdbOrder": "o__Lactobacillales", - "gtdbFamily": "f__Lactobacillaceae", - "gtdbGenus": "g__Lactobacillus", - "gtdbSpecies": "s__Lactobacillus delbrueckii" - }, - "gtdbTypeDesignation": "type strain of subspecies", - "subunit_summary": "5S/16S/23S", - "speciesRepName": "GCA_001433875.1", - "speciesClusterCount": 255, - "lpsnUrl": "https://lpsn.dsmz.de/species/lactobacillus-delbrueckii", - "link_ncbi_taxonomy": "d__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; f__Lactobacillaceae; g__Lactobacillus; s__Lactobacillus delbrueckii", - "link_ncbi_taxonomy_unfiltered": "d__Bacteria; x__Terrabacteria group; p__Firmicutes; c__Bacilli; o__Lactobacillales; f__Lactobacillaceae; g__Lactobacillus; s__Lactobacillus delbrueckii; sb__Lactobacillus delbrueckii subsp. bulgaricus; x__Lactobacillus delbrueckii subsp. bulgaricus ATCC 11842 = JCM 1002", - "ncbiTaxonomyFiltered": [ - { - "taxon": "d__Bacteria", - "taxonId": "2" - }, - { - "taxon": "p__Firmicutes", - "taxonId": "1239" - }, - { - "taxon": "c__Bacilli", - "taxonId": "91061" - }, - { - "taxon": "o__Lactobacillales", - "taxonId": "186826" - }, - { - "taxon": "f__Lactobacillaceae", - "taxonId": "33958" - }, - { - "taxon": "g__Lactobacillus", - "taxonId": "1578" - }, - { - "taxon": "s__Lactobacillus delbrueckii", - "taxonId": "1584" - } - ], - "ncbiTaxonomyUnfiltered": [ - { - "taxon": "d__Bacteria", - "taxonId": "2" - }, - { - "taxon": "x__Terrabacteria group", - "taxonId": "1783272" - }, - { - "taxon": "p__Firmicutes", - "taxonId": "1239" - }, - { - "taxon": "c__Bacilli", - "taxonId": "91061" - }, - { - "taxon": "o__Lactobacillales", - "taxonId": "186826" - }, - { - "taxon": "f__Lactobacillaceae", - "taxonId": "33958" - }, - { - "taxon": "g__Lactobacillus", - "taxonId": "1578" - }, - { - "taxon": "s__Lactobacillus delbrueckii", - "taxonId": "1584" - }, - { - "taxon": "sb__Lactobacillus delbrueckii subsp. bulgaricus", - "taxonId": "1585" - }, - { - "taxon": "x__Lactobacillus delbrueckii subsp. bulgaricus ATCC 11842 = JCM 1002", - "taxonId": "390333" - } - ], - "detail": "Genome found" - } -} \ No newline at end of file