-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathexample-config-pooled-feature-barcoding.yaml
188 lines (185 loc) · 5.49 KB
/
example-config-pooled-feature-barcoding.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# This config file describes an experiment.
# An experiment is "a collection of samples that need to be processed and analyzed together."
# The information under `experiment` is global; it does not change on a per-sample basis.
experiment:
bcl2fastq_version: "2.20.0" # Determines what image to run. Current Options: 2.20.0
cellranger_version: "3.0.2" # Determines what image to run Current Options: 2.2.0, 3.0.2
# sequencing_runs metadata is used to generate the experiment name.
# Based off the `sequencing_runs` data below, our generated experiment name would
# be "run1_himc1_032519-run2_himc2_032519"
# Sequencing run names are separated by '-'. We may have 1 or more sequencing
# runs. When there is more than 1 run, we refer to them as "pooled runs"
sequencing_runs:
- id: "1" # must be a string
himc_pool: "1" # must be a string
date: "2019-03-25"
bcl_file: "190325_NB501426_0271_AH2J72BGXB.tar.gz"
- id: "2" # must be a string
himc_pool: "2" # must be a string
date: "2019-03-26" # YYYY-MM-DD
bcl_file: "190326_NB501426_0272_AH2J75BGXY.tar.gz"
meta:
debug: true # boolean; defaults to false
# Either
# * Nothing
# * bcl2fastq
# * mkfastq
# * mkfastq & bcl2fastq
processing:
mkfastq:
samples:
- name: 7002v3A
index_location: SI-GA-E12
- name: 7002v3B
index_location: SI-GA-E11
bcl2fastq:
samples:
- name: 7002v3A-A
index_location: RPI9
- name: 7002v3A-H
# The `_s` suffix here is necessary so that we can accurately match the index_location
# to a proper sequence in from our citeseq_sample_indices.csv file
index_location: D709_s
- name: 7002v3B-A
index_location: RPI10
- name: 7002v3B-H
# The `_s` suffix here is necessary so that we can accurately match the index_location
# to a proper sequence from our citeseq_sample_indices.csv file
index_location: D710_s
analyses:
samples:
- name: 7002v3A
job_type: count # may be count or vdj
# a default chemistry of auto is usually sufficient to run cellranger count;
# however, if errors are thrown, you may specify the chemistry
# as per the 10x documentation: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/count
chemistry: auto
reference_transcriptome:
# may also be hg19, mm10, or vdj; see 10x-pipeline/reference_transcriptome
# s3 bucket for currently available options for name and version. You may also reference
# ./docs/Building_the_Pipeline.md
name: GRCh38
version: 1.2.0
target_cell_count: 10000
feature_barcoding:
feature_type: Custom # other options include `Antibody Capture`
directionality: 3p # may be 3p or 5p
enabled: True # will be False if we're not performing feature barcoding
samples:
# We nest related CITE-seq samples under the "parent" GEX sample (e.g. 7002v3A)
- name: 7002v3A-A
- name: 7002v3A-H
oligo_group_id: "1" # Refers to a specific group of oligos under the `oligo_groups` key
pooled_run: True # will be False if we only have 1 sequencing run
- name: 7002v3B
job_type: count
chemistry: auto
reference_transcriptome:
name: GRCh38
version: 1.2.0
target_cell_count: 10000
feature_barcoding:
feature_type: Custom
directionality: 3p
enabled: True
samples:
- name: 7002v3B-A
- name: 7002v3B-H
oligo_group_id: "1" # must be a string
pooled_run: True
oligo_groups:
- id: "1" # must match an `oligo_group_id` value
# Did we use Biolegend Total-Seq B or Total-Seq C antibodies?
total_seq_b_c: False
oligos:
# HTO oligos must be prefixed with an 'HTO'. Our pipeline will automatically
# prefix the ADTs with `adt_` so the ADT names should not be prefixed here.
# These will be matched to a proper sequence from our adt_hto_bc_sequences.csv
# file
- CD86
- CD274_PD-L1
- CD26
- CD40
- CD3
- CD4
- CD8
- CD45
- CD19
- CD14
- CD33
- CD11c
- CD138
- CD38
- CD117
- CD10
- CD45RA
- CD123
- CD127
- CD194_CCR4
- CD16
- CD56
- CD25
- CD279_PD-1
- CD20
- IgD
- CD183_CXCR3
- CD195_CCR5
- CD196_CCR6
- CD185_CXCR5
- CD103
- CD69
- CD62L
- CD197_CCR7
- CD161
- CD28
- CD27
- HLA-DR
- CD1c
- CD11b
- CD141
- CD66b
- CD57
- CD278_ICOS
- CD24
- CD206
- CD169
- CD370
- CD314_NKG2D
- TIGIT_VSTM3
- CD273_PD-L2
- CD5
- XCR1
- CD163
- CD58
- TCR_gamma_delta
- CD272_BTLA
- CD29
- CD254
- CD146
- CD140a_PDGFRalpha
- CD140b_PDGFRbeta
- CD90_Thy1
- CD105
- CD326_Ep-CAM
- CD44
- CD31
- CD134_OX40
- CD223_LAG-3
- CD152_CTLA-4
- HLA-ABC
- CD39
- CD244_2B4
- CD226_DNAM-1
- CD115_CSF-1R
- CD34
- Podoplanin
- CD54
- CD9
- FAP-IH
- IgG4-IH
- HTO_1
- HTO_2
- HTO_3
- HTO_4
- HTO_5
- HTO_Mouse_A20