generated from bokulich-lab/q2-plugin-template
-
Notifications
You must be signed in to change notification settings - Fork 14
ReferenceDB[BuscoDB]
Santiago Castro Dau edited this page Feb 29, 2024
·
2 revisions
Artifact containing BUSCO database for one or more lineages.
class BuscoDatabaseDirFmt(model.DirectoryFormat):
# File collections for text files
(
ancestral,
dataset,
lengths_cutoff,
scores_cutoff,
links_to_ODB,
ancestral_variants,
ogs_id,
species,
hmms,
refseq_db_md5
) = [
model.FileCollection(
rf"busco_downloads\/lineages\/.+\/{pattern}",
format=BuscoGenericTextFileFmt
)
for pattern in [
r'ancestral$',
r'dataset\.cfg$',
r'lengths_cutoff$',
r'scores_cutoff$',
r'links_to_ODB.+\.txt$',
r'ancestral_variants$',
r'info\/ogs\.id\.info$',
r'info\/species\.info$',
r'hmms\/.+\.hmm$',
r'refseq_db\.faa\.gz\.md5'
]
]
# Placement_files. Optional because they are not in virus DB
(
list_of_reference_markers,
mapping_taxid_lineage,
mapping_taxids_busco_dataset_name,
tree,
tree_metadata,
) = [
model.FileCollection(
rf"busco_downloads\/placement_files\/{pattern}",
format=BuscoGenericTextFileFmt,
optional=True
)
for pattern in [
r'list_of_reference_markers\..+\.txt$',
r'mapping_taxid-lineage\..+\.txt$',
r'mapping_taxids-busco_dataset_name\..+\.txt$',
r'tree\..+\.nwk$',
r'tree_metadata\..+\.txt$',
]
]
# Others
supermatrix_aln = model.FileCollection(
r'busco_downloads\/placement_files\/supermatrix\.aln\..+\.faa$',
format=AlignedProteinFASTAFormat,
optional=True
)
prfls = model.FileCollection(
r'busco_downloads\/lineages\/.+\/prfl\/.+\.prfl$',
format=BuscoGenericTextFileFmt,
optional=True
)
version_file = model.File(
'busco_downloads/file_versions.tsv', format=BuscoGenericTextFileFmt
)
refseq_db = model.FileCollection(
r'busco_downloads\/lineages\/.+refseq_db\.faa\.gz',
format=BuscoGenericBinaryFileFmt
)
data
└── busco_downloads
├── lineages
│ ├── lineage_1
│ │ ├── hmms
│ │ ├── lengths_cutoff
│ │ ├── ancestral
│ │ ├── ancestral_variants
│ │ ├── dataset.cfg
│ │ ├── links_to_<dataset_version>.txt
│ │ ├── refseq_db.faa.gz
│ │ ├── refseq_db.faa.gz.md5
│ │ ├── scores_cutoff
│ │ └── info
│ ⋮
│ └── lineage_i
│ ⋮
└── placement_files
- See MAG Quality Control tutorial for a full example.
- Actions in
q2-moshpit
that have this semantic type: