default.properties

# Default configuration. Please create a copy of this file instead of editing.
# At least, set io.input and io.outputDirectory.


# Line will be added to info file.
info = OPAL


# Input file or directory. Multiple entries can be separated by '|'.
io.input = 

# Optional: Named graph of N-quads input files. Has to be the same in every input.
io.inputGraph = 


# Output directory.
io.outputDirectory = 

# Output title, used as prefix for files.
io.outputTitle = opal

# Output file format.
io.outputFormat = ttl 

# Output dataset models per file.
io.outputSize = 10000

# Writing of RDF files
io.outputWrite = true


# Elasticsearch writing
io.elasticsearch.write = false
io.elasticsearch.hostname = localhost 
io.elasticsearch.port = 9200 
io.elasticsearch.scheme = http 
io.elasticsearch.index =  opal
 

# OPAL Components to run

# Data cleaning component.
# https://github.com/projekt-opal/catfish
run.catfish = true

# Language Detection and Geographic data.
# https://github.com/projekt-opal/metadata-refinement
run.languageDetection = true
run.geoData = true

# Metadata quality component.
# https://github.com/projekt-opal/civet
run.civet = true


# Adds an additional file containing labels.
add.labels = true


# Catfish configuration

# Remove blank nodes, which are not subject of triples.
catfish.cleanEmptyBlankNodes = true

# Removes literals, which are not empty, german or english
catfish.removeNonDeEnEmptyTitles = true

# Removes datasets, which do not have a german and an english title.
# Additionally, non-german and non-english titles and descriptions are removed.
# Note: Used for EDP
catfish.removeNonDeEn = false

# Remove triples with literals as object, which contain no value or unreadable.
# And also extract Language Tag and DataType if it is mistakenly inside the string.
catfish.cleanLiterals = true

# Check dct:format and dcat:mediaType for values and create new triples.
catfish.cleanFormats = true

# Rewrites date formats.
catfish.equalizeDateFormats = true

# If true, the first input is used to set catfish.replaceUrisCatalog.
# If false, catfish.replaceUrisCatalog value itself is used.
catfish.replaceUrisCatalogByFilename = true 

# Rewrites URIs of datasets and distributions.
# Has to be same for every dataset in batch process.
# If is empty, URIs are not rewritten.
# Possible values: europeandataportal, govdata, mcloud, mdm
catfish.replaceUrisCatalog = 


# Civet configuration

# If long running metrics should be included.
# default: false
civet.includeLongRunning = false

# If it should be logged, if a measurement could not be computed.
# default: true
civet.logIfNotComputed = false

# If existing measurements should be removed.
# default: true
civet.removeMeasurements = true


# Additional components to run

# Creates statistics about date formats and types
statistics.dates = false

# Creates statistics about languages
statistics.languages = false

# Creates statistics about themes (categories)
statistics.themes = false

# Creates statistics about languages of titles
statistics.titleLanguages = false