-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdefault.properties
121 lines (80 loc) · 3 KB
/
default.properties
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# Default configuration. Please create a copy of this file instead of editing.
# At least, set io.input and io.outputDirectory.
# Line will be added to info file.
info = OPAL
# Input file or directory. Multiple entries can be separated by '|'.
io.input =
# Optional: Named graph of N-quads input files. Has to be the same in every input.
io.inputGraph =
# Output directory.
io.outputDirectory =
# Output title, used as prefix for files.
io.outputTitle = opal
# Output file format.
io.outputFormat = ttl
# Output dataset models per file.
io.outputSize = 10000
# Writing of RDF files
io.outputWrite = true
# Elasticsearch writing
io.elasticsearch.write = false
io.elasticsearch.hostname = localhost
io.elasticsearch.port = 9200
io.elasticsearch.scheme = http
io.elasticsearch.index = opal
# OPAL Components to run
# Data cleaning component.
# https://github.com/projekt-opal/catfish
run.catfish = true
# Language Detection and Geographic data.
# https://github.com/projekt-opal/metadata-refinement
run.languageDetection = true
run.geoData = true
# Metadata quality component.
# https://github.com/projekt-opal/civet
run.civet = true
# Adds an additional file containing labels.
add.labels = true
# Catfish configuration
# Remove blank nodes, which are not subject of triples.
catfish.cleanEmptyBlankNodes = true
# Removes literals, which are not empty, german or english
catfish.removeNonDeEnEmptyTitles = true
# Removes datasets, which do not have a german and an english title.
# Additionally, non-german and non-english titles and descriptions are removed.
# Note: Used for EDP
catfish.removeNonDeEn = false
# Remove triples with literals as object, which contain no value or unreadable.
# And also extract Language Tag and DataType if it is mistakenly inside the string.
catfish.cleanLiterals = true
# Check dct:format and dcat:mediaType for values and create new triples.
catfish.cleanFormats = true
# Rewrites date formats.
catfish.equalizeDateFormats = true
# If true, the first input is used to set catfish.replaceUrisCatalog.
# If false, catfish.replaceUrisCatalog value itself is used.
catfish.replaceUrisCatalogByFilename = true
# Rewrites URIs of datasets and distributions.
# Has to be same for every dataset in batch process.
# If is empty, URIs are not rewritten.
# Possible values: europeandataportal, govdata, mcloud, mdm
catfish.replaceUrisCatalog =
# Civet configuration
# If long running metrics should be included.
# default: false
civet.includeLongRunning = false
# If it should be logged, if a measurement could not be computed.
# default: true
civet.logIfNotComputed = false
# If existing measurements should be removed.
# default: true
civet.removeMeasurements = true
# Additional components to run
# Creates statistics about date formats and types
statistics.dates = false
# Creates statistics about languages
statistics.languages = false
# Creates statistics about themes (categories)
statistics.themes = false
# Creates statistics about languages of titles
statistics.titleLanguages = false