-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfoliavalidator.nf
executable file
·104 lines (83 loc) · 2.7 KB
/
foliavalidator.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env nextflow
/*
vim: syntax=groovy
-*- mode: groovy;-*-
*/
log.info "--------------------------"
log.info "FoLiA Validator Pipeline"
log.info "--------------------------"
def env = System.getenv()
params.extension = "folia.xml"
params.virtualenv = env.containsKey('VIRTUAL_ENV') ? env['VIRTUAL_ENV'] : ""
params.outreport = "./foliavalidation.report"
params.outsummary = "./foliavalidation.summary"
if (params.containsKey('help') || !params.containsKey('inputdir')) {
log.info "Usage:"
log.info " foliavalidator.nf --inputdir DIRECTORY [OPTIONS]"
log.info ""
log.info "Options:"
log.info " --inputdir DIRECTORY Path to the corpus directory"
log.info " --extension EXTENSION Extension of FoLiA documents (default: folia.xml)"
log.info " --virtualenv PATH Path to Python Virtual Environment to load (usually path to LaMachine)"
exit 2
}
documents = Channel.fromPath(params.inputdir + "/**." + params.extension)
validationresults = Channel.create()
report = Channel.create()
summary = Channel.create()
process foliavalidator {
validExitStatus 0,1
input:
file doc from documents
val virtualenv from params.virtualenv
output:
file "*.foliavalidator" into validationresults
script:
"""
set +u
if [ ! -z "${virtualenv}" ]; then
source ${virtualenv}/bin/activate
fi
set -u
date=\$(date +"%Y-%m-%d %H:%M:%S")
echo "--------------- \$date ---------------" > "${doc}.foliavalidator"
echo "md5 checksum: "\$(md5sum ${doc}) >> "${doc}.foliavalidator"
foliavalidator "${doc}" 2>> "${doc}.foliavalidator"
if [ \$? -eq 0 ]; then
echo \$(readlink "${doc}")"\tOK" >> "${doc}.foliavalidator"
else
cat "${doc}.foliavalidator" >&2
echo \$(readlink "${doc}")"\tFAILED" >> "${doc}.foliavalidator"
fi
"""
}
//split channel
validationresults_report = Channel.create()
validationresults_summary = Channel.create()
validationresults.into { validationresults_report; validationresults_summary }
process report {
input:
file "*.foliavalidator" from validationresults_report.collect()
output:
file "foliavalidation.report" into report
script:
"""
find -name "*.foliavalidator" | xargs -n 1 cat > foliavalidation.report
"""
}
process summary {
input:
file "*.foliavalidator" from validationresults_summary.collect()
output:
file "foliavalidation.summary" into summary
script:
"""
find -name "*.foliavalidator" | xargs -n 1 tail -n 1 > foliavalidation.summary
"""
}
//validationresults.subscribe { print it.text }
report
.collectFile(name: params.outreport)
summary
.collectFile(name: params.outsummary)
.println { it.text }