workloads/tsworkload_template

# Copyright (c) 2017 YCSB contributors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you
# may not use this file except in compliance with the License. You
# may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the License for the specific language governing
# permissions and limitations under the License. See accompanying
# LICENSE file.

# Yahoo! Cloud System Benchmark
# Time Series Workload Template: Default Values
#
# File contains all properties that can be set to define a
# YCSB session. All properties are set to their default
# value if one exists. If not, the property is commented
# out. When a property has a finite number of settings,
# the default is enabled and the alternates are shown in
# comments below it.
# 
# Use of each property is explained through comments in Client.java, 
# CoreWorkload.java, TimeSeriesWorkload.java or on the YCSB wiki page:
# https://github.com/brianfrankcooper/YCSB/wiki/TimeSeriesWorkload

# The name of the workload class to use. Always the following.
workload=com.yahoo.ycsb.workloads.TimeSeriesWorkload

# The default is Java's Long.MAX_VALUE.
# The number of records in the table to be inserted in
# the load phase or the number of records already in the 
# table before the run phase.
recordcount=1000000

# There is no default setting for operationcount but it is
# required to be set.
# The number of operations to use during the run phase.
operationcount=3000000

# The number of insertions to do, if different from recordcount.
# Used with insertstart to grow an existing table.
#insertcount=

# ..::NOTE::.. This is different from the CoreWorkload!
# The starting timestamp of a run as a Unix Epoch numeral in the 
# unit set in 'timestampunits'. This is used to determine what 
# the first timestamp should be when writing or querying as well
# as how many offsets (based on 'timestampinterval').
#insertstart=

# The units represented by the 'insertstart' timestamp as well as
# durations such as 'timestampinterval', 'querytimespan', etc.
# For values, see https://docs.oracle.com/javase/7/docs/api/java/util/concurrent/TimeUnit.html
# Note that only seconds through nanoseconds are supported.
timestampunits=SECONDS

# The amount of time between each value in every time series in
# the units of 'timestampunits'.
timestampinterval=60

# ..::NOTE::.. This is different from the CoreWorkload!
# Represents the number of unique "metrics" or "keys" for time series.
# E.g. "sys.cpu" may be a single field or "metric" while there may be many
# time series sharing that key (perhaps a host tag with "web01" and "web02"
# as options).
fieldcount=16

# The number of characters in the "metric" or "key".
fieldlength=8

# --- TODO ---?
# The distribution used to choose the length of a field
fieldlengthdistribution=constant
#fieldlengthdistribution=uniform
#fieldlengthdistribution=zipfian

# The number of unique tag combinations for each time series. E.g
# if this value is 4, each record will have a key and 4 tag combinations
# such as A=A, B=A, C=A, D=A.
tagcount=4

# The cardinality (number of unique values) of each tag value for 
# every "metric" or field as a  comma separated list. Each value must 
# be a number from 1 to Java's Integer.MAX_VALUE and there must be 
# 'tagcount' values. If there are  more or fewer values than 
#'tagcount' then either it is ignored or 1 is substituted respectively.
tagcardinality=1,2,4,8

# The length of each tag key in characters.
tagkeylength=8

# The length of each tag value in characters.
tagvaluelength=8

# The character separating tag keys from tag values when reads, deletes
# or scans are executed against a database. The default is the equals sign
# so a field passed in a read to a DB may look like 'AA=AB'.
tagpairdelimiter==

# The delimiter between keys and tags when a delete is passed to the DB.
# E.g. if there was a key and a field, the request key would look like:
# 'AA:AA=AB'
deletedelimiter=:

# Whether or not to randomize the timestamp order when performing inserts
# and updates against a DB. By default all writes perform with the 
# timestamps moving linearly forward in time once all time series for a
# given key have been written.
randomwritetimestamporder=false

# Whether or not to randomly shuffle the time series order when writing.
# This will shuffle the keys, tag keys and tag values.
# ************************************************************************
# WARNING - When this is enabled, reads and scans will likely return many
# empty results as invalid tag combinations will be chosen. Likewise 
# this setting is INCOMPATIBLE with data integrity checks.
# ************************************************************************
randomtimeseriesorder=false

# The type of numerical data generated for each data point. The values are
# 64 bit signed integers, double precision floating points or a random mix.
# For data integrity, this setting is ignored and values are switched to
# 64 bit signed ints.
#valuetype=integers
valuetype=floats
#valuetype=mixed

# A value from 0 to 0.999999 representing how sparse each time series
# should be. The higher this value, the greater the time interval between
# values in a single series. For example, if sparsity is 0 and there are
# 10 time series with a 'timestampinterval' of 60 seconds with a total
# time range of 10 intervals, you would see 100 values written, one per
# timestamp interval per time series. If the sparsity is 0.50 then there
# would be only about 50 values written so some time series would have
# missing values at each interval.
sparsity=0.00

# The percentage of time series that are "lagging" behind the current
# timestamp of the writer. This is used to mimic a common behavior where
# most sources (agents, sensors, etc) are writing data in sync (same timestamp)
# but a subset are running behind due to buffering, latency issues, etc.
delayedSeries=0.10

# The maximum amount of delay for delayed series in interval counts. The 
# actual delay is chosen based on a modulo of the series index.
delayedIntervals=5

# The fixed or maximum amount of time added to the start time of a 
# read or scan operation to generate a query over a range of time 
# instead of a single timestamp. Units are shared with 'timestampunits'.
# For example if the value is set to 3600 seconds (1 hour) then 
# each read would pick a random start timestamp based on the 
#'insertstart' value and number of intervals, then add 3600 seconds
# to create the end time of the query. If this value is 0 then reads
# will only provide a single timestamp. 
# WARNING: Cannot be used with 'dataintegrity'.
querytimespan=0

# Whether or not reads should choose a random time span (aligned to
# the 'timestampinterval' value) for each read or scan request starting
# at 0 and reaching 'querytimespan' as the max.
queryrandomtimespan=false

# A delimiter character used to separate the start and end timestamps
# of a read query when 'querytimespan' is enabled.
querytimespandelimiter=,

# A unique key given to read, scan and delete operations when the
# operation should perform a group-by (multi-series aggregation) on one 
# or more tags. If 'groupbyfunction' is set, this key will be given with
# the configured function.
groupbykey=YCSBGB

# A function name (e.g. 'sum', 'max' or 'avg') passed during reads, 
# scans and deletes to cause the database to perform a group-by 
# operation on one or more tags. If this value is empty or null 
# (default), group-by operations are not performed
#groupbyfunction=

# A comma separated list of 0s or 1s to denote which of the tag keys
# should be grouped during group-by operations. The number of values
# must match the number of tags in 'tagcount'.
#groupbykeys=0,0,1,1

# A unique key given to read and scan operations when the operation
# should downsample the results of a query into lower resolution
# data. If 'downsamplingfunction' is set, this key will be given with
# the configured function.
downsamplingkey=YCSBDS

# A function name (e.g. 'sum', 'max' or 'avg') passed during reads and
# scans to cause the database to perform a downsampling operation
# returning lower resolution data. If this value is empty or null 
# (default), downsampling is not performed.
#downsamplingfunction=

# A time interval for which to downsample the raw data into. Shares
# the same units as 'timestampinterval'. This value must be greater
# than 'timestampinterval'. E.g. if the timestamp interval for raw
# data is 60 seconds, the downsampling interval could be 3600 seconds
# to roll up the data into 1 hour buckets.
#downsamplinginterval=

# What proportion of operations are reads
readproportion=0.10

# What proportion of operations are updates
updateproportion=0.00

# What proportion of operations are inserts
insertproportion=0.90

# The distribution of requests across the keyspace
requestdistribution=zipfian
#requestdistribution=uniform
#requestdistribution=latest

# The name of the database table to run queries against
table=usertable

# Whether or not data should be validated during writes and reads. If
# set then the data type is always a 64 bit signed integer and is the
# hash code of the key, timestamp and tags. 
dataintegrity=false

# How the latency measurements are presented
measurementtype=histogram
#measurementtype=timeseries
#measurementtype=raw
# When measurementtype is set to raw, measurements will be output
# as RAW datapoints in the following csv format:
# "operation, timestamp of the measurement, latency in us"
#
# Raw datapoints are collected in-memory while the test is running. Each
# data point consumes about 50 bytes (including java object overhead).
# For a typical run of 1 million to 10 million operations, this should
# fit into memory most of the time. If you plan to do 100s of millions of
# operations per run, consider provisioning a machine with larger RAM when using
# the RAW measurement type, or split the run into multiple runs.
#
# Optionally, you can specify an output file to save raw datapoints.
# Otherwise, raw datapoints will be written to stdout.
# The output file will be appended to if it already exists, otherwise
# a new output file will be created.
#measurement.raw.output_file = /tmp/your_output_file_for_this_run

# JVM Reporting.
#
# Measure JVM information over time including GC counts, max and min memory
# used, max and min thread counts, max and min system load and others. This
# setting must be enabled in conjunction with the "-s" flag to run the status
# thread. Every "status.interval", the status thread will capture JVM 
# statistics and record the results. At the end of the run, max and mins will
# be recorded.
# measurement.trackjvm = false

# The range of latencies to track in the histogram (milliseconds)
histogram.buckets=1000

# Granularity for time series (in milliseconds)
timeseries.granularity=1000

# Latency reporting.
#
# YCSB records latency of failed operations separately from successful ones.
# Latency of all OK operations will be reported under their operation name,
# such as [READ], [UPDATE], etc.
#
# For failed operations:
# By default we don't track latency numbers of specific error status.
# We just report latency of all failed operation under one measurement name
# such as [READ-FAILED]. But optionally, user can configure to have either:
# 1. Record and report latency for each and every error status code by
#    setting reportLatencyForEachError to true, or
# 2. Record and report latency for a select set of error status codes by
#    providing a CSV list of Status codes via the "latencytrackederrors"
#    property.
# reportlatencyforeacherror=false
# latencytrackederrors="<comma separated strings of error codes>"