-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathwriter.jl
119 lines (110 loc) · 3.14 KB
/
writer.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
## GFF3 Writer
struct Writer{S <: TranscodingStream} <: BioGenerics.IO.AbstractWriter
output::S
end
function BioGenerics.IO.stream(writer::Writer)
return writer.output
end
"""
GFF.Writer(output::IO; width=70)
Create a data writer of the GFF file format.
```julia
open(GFF.Writer, outfile) do writer
write(writer, genome)
end
```
"""
function Writer(output::IO)
if output isa TranscodingStream
return Writer{typeof(output)}(stream)
else
stream = TranscodingStreams.NoopStream(output)
return Writer{typeof(stream)}(stream)
end
end
function Base.write(writer::Writer, record::Record)
printgff(writer.output, record)
end
function gffstring(gene::Gene)
buf = IOBuffer()
firstattribute = true
for field in names(parent(gene).genedata)
field in [:source, :score, :phase] && continue
v = parent(gene).genedata[index(gene), field]
if !ismissing(v)
if firstattribute
firstattribute = false
else
print(buf, ";")
end
if v isa AbstractVector
print(buf, field, "=")
for i in eachindex(v)
print(buf, oneline(v[i]))
i == lastindex(v) ? print(buf, ";") : print(buf, ",")
end
else
print(buf, field, "=", oneline(v))
end
end
end
if ismultilocus(gene)
s = String(take!(buf))
res = IOBuffer()
for loc in locus(gene)
println(res, join([parent(gene).name,
get(gene, :source, "."),
feature(gene),
loc.start,
loc.stop,
get(gene, :score, "."),
loc.strand,
get(gene, :phase, "."),
s], '\t'))
end
String(take!(res))
else
join([parent(gene).name,
get(gene, :source, "."),
feature(gene),
locus(gene).start,
locus(gene).stop,
get(gene, :score, "."),
locus(gene).strand,
get(gene, :phase, "."),
String(take!(buf))], '\t') * "\n"
end
end
"""
printgff(io::IO, chr)
printgff(path::AbstractString, chr)
Print `chr` in GFF3 format.
"""
printgff(filepath::AbstractString, chrs) = printgff(open(filepath, "w"), chrs)
printgff(io::IO, chr::Record) = printgff(io, [chr])
function printgff(io::IO, chrs::AbstractVector{Record{G}}) where G <: AbstractGene
iobuffer = IOBuffer()
### Header
if occursin(r"^##gff-version 3", chrs[1].header)
print(iobuffer, chrs[1].header)
else
println(iobuffer, "##gff-version 3")
end
### Body
for chr in chrs
for gene in chr.genes
print(iobuffer, gffstring(gene))
end
end
### Footer
if !all(isempty(chr.sequence) for chr in chrs)
println(iobuffer, "##FASTA")
for chr in chrs
println(iobuffer, ">", chr.name)
for s in Iterators.partition(chr.sequence, 80)
println(iobuffer, join(s))
end
end
end
print(io, String(take!(iobuffer)))
end