-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbam_parser.config
66 lines (56 loc) · 1.95 KB
/
bam_parser.config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import java.util.zip.GZIPInputStream
/**
* This bam_parser namespace is used to parse the header from a BAM file.
* The information is loaded into a Map.
*/
bam_parser {
/**
* Set up the decompressed SAM file reader for the input BAM
*/
get_sam_reader = { String bam_path ->
InputStream gzip_stream = new GZIPInputStream(new FileInputStream(bam_path))
BufferedReader reader = new BufferedReader(new InputStreamReader(gzip_stream, 'UTF-8'))
int BUFFER_SIZE = 1000
reader.mark(BUFFER_SIZE)
def curr_line = reader.readLine()
def header_start_index = curr_line.indexOf('@')
while (curr_line != null && header_start_index == -1) {
reader.mark(BUFFER_SIZE)
curr_line = reader.readLine()
header_start_index = curr_line.indexOf('@')
}
if (header_start_index == -1) {
throw new IllegalArgumentException("Failed to find header in BAM: ${bam_path}")
}
reader.reset()
reader.skip(header_start_index)
return reader
}
/**
* Parse read group line
*/
parse_read_group = { String line ->
def rg_map = [:]
(line =~ /(?<tag>[A-Za-z][A-Za-z0-9]):(?<value>[ -~]+)/).each{ match, tag, value ->
rg_map[tag] = value
}
return rg_map
}
/**
* Main parsing function for calling. Returns parsed data as a Map.
*/
parse_bam_header = { String bam_path ->
def sam_reader = bam_parser.get_sam_reader(bam_path)
def line = sam_reader.readLine()
def parsed_header = [:]
parsed_header['read_group'] = []
while (line && line.startsWith('@')) {
// Handle different header lines here
if (line.startsWith('@RG')) {
parsed_header['read_group'].add(bam_parser.parse_read_group(line))
}
line = sam_reader.readLine()
}
return parsed_header
}
}