forked from suda-huanglab/circlehunter
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfig-template.yaml
83 lines (75 loc) · 2.62 KB
/
config-template.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
genome:
# genome size, use hs as for human, check MACS2 document for more details
size: hs
# path to genome reference fasta
fasta: /share/references/hg38/fasta/hg38.fa
# path to genome bwa index
bwa_index: /share/references/hg38/bwa_index/hg38.fa
# path to sorted genome blacklist, download from ENCODE
blacklist: /share/references/hg38/blacklist/hg38.blacklist.sorted.bed
# gene annotation in bgzip bed format, must indexed using tabix, download from UCSC
refgene: /share/references/hg38/refgene_anno/hg38.refGene.gene.bed.gz
# path to reference genome size file
mock_regions: /share/references/hg38/1000_Genomes_Project/20160622.allChr.pilot_mask.bed
# path to bed file than you want to mock ecDNA
# path to ATAC-Seq adapter
adapter: /path/to/resource/adapter.fa
# reads filter parameters
params:
# minimum mapq
mapq: 10
# flag include
include: 1
# flag exclude
exclude: 1548
# maximum mismatch rate
mismatch: 0.05
# minimum depth for considering a junction coverage, optional
depth: 10
# maximum output circle
limit: 1000
# path to workspace, all final output and intermediate output will be written to this directory
workspace: /path/to/circlehunter/workspace
# sample list
samples:
GSM3832731: # sample id
fastq:
SRR8236755: # library id
# path to fastq of read1
fq1: /path/to/fastq/PRJNA506071/GSM3832731/SRR8236755/SRR8236755_1.fastq.gz
# path to fastq of read2
fq2: /path/to/fastq/PRJNA506071/GSM3832731/SRR8236755/SRR8236755_2.fastq.gz
# specific another library of the same sample like upon if needed
# circlehunter will merge them automatically for you
GSM3832732:
bam:
# bam file as input, circlehunter will use the aligned and sorted bam file instead
SRR8236756: /path/to/bam/PRJNA506071/GSM3832732/SRR8236756/SRR8236756.sorted.bam
# specific another library of the same sample like upon if needed
# circlehunter will merge them automatically for you
# ## following config only use in simulation
# simulation:
# # random seed
# seed: 1665331200
# # number of ecDNA mix in one sample
# num: 2
# # benchmark one sample repeat times
# repeat: 5
# # simulation reads fragment size
# fragment_size_mean: 250
# fragment_size_std: 180
# # read coverage params
# read_depth_test_length: 100
# read_depth_ranges:
# - 10
# - 20
# - 30
# - 40
# - 50
# # read length params
# read_length_test_depth: 30
# read_length_ranges:
# - 35
# - 50
# - 75
# - 100