-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathtsworkload_template
executable file
·283 lines (240 loc) · 11.7 KB
/
tsworkload_template
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
# Copyright (c) 2017 YCSB contributors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you
# may not use this file except in compliance with the License. You
# may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the License for the specific language governing
# permissions and limitations under the License. See accompanying
# LICENSE file.
# Yahoo! Cloud System Benchmark
# Time Series Workload Template: Default Values
#
# File contains all properties that can be set to define a
# YCSB session. All properties are set to their default
# value if one exists. If not, the property is commented
# out. When a property has a finite number of settings,
# the default is enabled and the alternates are shown in
# comments below it.
#
# Use of each property is explained through comments in Client.java,
# CoreWorkload.java, TimeSeriesWorkload.java or on the YCSB wiki page:
# https://github.com/brianfrankcooper/YCSB/wiki/TimeSeriesWorkload
# The name of the workload class to use. Always the following.
workload=com.yahoo.ycsb.workloads.TimeSeriesWorkload
# The default is Java's Long.MAX_VALUE.
# The number of records in the table to be inserted in
# the load phase or the number of records already in the
# table before the run phase.
recordcount=1000000
# There is no default setting for operationcount but it is
# required to be set.
# The number of operations to use during the run phase.
operationcount=3000000
# The number of insertions to do, if different from recordcount.
# Used with insertstart to grow an existing table.
#insertcount=
# ..::NOTE::.. This is different from the CoreWorkload!
# The starting timestamp of a run as a Unix Epoch numeral in the
# unit set in 'timestampunits'. This is used to determine what
# the first timestamp should be when writing or querying as well
# as how many offsets (based on 'timestampinterval').
#insertstart=
# The units represented by the 'insertstart' timestamp as well as
# durations such as 'timestampinterval', 'querytimespan', etc.
# For values, see https://docs.oracle.com/javase/7/docs/api/java/util/concurrent/TimeUnit.html
# Note that only seconds through nanoseconds are supported.
timestampunits=SECONDS
# The amount of time between each value in every time series in
# the units of 'timestampunits'.
timestampinterval=60
# ..::NOTE::.. This is different from the CoreWorkload!
# Represents the number of unique "metrics" or "keys" for time series.
# E.g. "sys.cpu" may be a single field or "metric" while there may be many
# time series sharing that key (perhaps a host tag with "web01" and "web02"
# as options).
fieldcount=16
# The number of characters in the "metric" or "key".
fieldlength=8
# --- TODO ---?
# The distribution used to choose the length of a field
fieldlengthdistribution=constant
#fieldlengthdistribution=uniform
#fieldlengthdistribution=zipfian
# The number of unique tag combinations for each time series. E.g
# if this value is 4, each record will have a key and 4 tag combinations
# such as A=A, B=A, C=A, D=A.
tagcount=4
# The cardinality (number of unique values) of each tag value for
# every "metric" or field as a comma separated list. Each value must
# be a number from 1 to Java's Integer.MAX_VALUE and there must be
# 'tagcount' values. If there are more or fewer values than
#'tagcount' then either it is ignored or 1 is substituted respectively.
tagcardinality=1,2,4,8
# The length of each tag key in characters.
tagkeylength=8
# The length of each tag value in characters.
tagvaluelength=8
# The character separating tag keys from tag values when reads, deletes
# or scans are executed against a database. The default is the equals sign
# so a field passed in a read to a DB may look like 'AA=AB'.
tagpairdelimiter==
# The delimiter between keys and tags when a delete is passed to the DB.
# E.g. if there was a key and a field, the request key would look like:
# 'AA:AA=AB'
deletedelimiter=:
# Whether or not to randomize the timestamp order when performing inserts
# and updates against a DB. By default all writes perform with the
# timestamps moving linearly forward in time once all time series for a
# given key have been written.
randomwritetimestamporder=false
# Whether or not to randomly shuffle the time series order when writing.
# This will shuffle the keys, tag keys and tag values.
# ************************************************************************
# WARNING - When this is enabled, reads and scans will likely return many
# empty results as invalid tag combinations will be chosen. Likewise
# this setting is INCOMPATIBLE with data integrity checks.
# ************************************************************************
randomtimeseriesorder=false
# The type of numerical data generated for each data point. The values are
# 64 bit signed integers, double precision floating points or a random mix.
# For data integrity, this setting is ignored and values are switched to
# 64 bit signed ints.
#valuetype=integers
valuetype=floats
#valuetype=mixed
# A value from 0 to 0.999999 representing how sparse each time series
# should be. The higher this value, the greater the time interval between
# values in a single series. For example, if sparsity is 0 and there are
# 10 time series with a 'timestampinterval' of 60 seconds with a total
# time range of 10 intervals, you would see 100 values written, one per
# timestamp interval per time series. If the sparsity is 0.50 then there
# would be only about 50 values written so some time series would have
# missing values at each interval.
sparsity=0.00
# The percentage of time series that are "lagging" behind the current
# timestamp of the writer. This is used to mimic a common behavior where
# most sources (agents, sensors, etc) are writing data in sync (same timestamp)
# but a subset are running behind due to buffering, latency issues, etc.
delayedSeries=0.10
# The maximum amount of delay for delayed series in interval counts. The
# actual delay is chosen based on a modulo of the series index.
delayedIntervals=5
# The fixed or maximum amount of time added to the start time of a
# read or scan operation to generate a query over a range of time
# instead of a single timestamp. Units are shared with 'timestampunits'.
# For example if the value is set to 3600 seconds (1 hour) then
# each read would pick a random start timestamp based on the
#'insertstart' value and number of intervals, then add 3600 seconds
# to create the end time of the query. If this value is 0 then reads
# will only provide a single timestamp.
# WARNING: Cannot be used with 'dataintegrity'.
querytimespan=0
# Whether or not reads should choose a random time span (aligned to
# the 'timestampinterval' value) for each read or scan request starting
# at 0 and reaching 'querytimespan' as the max.
queryrandomtimespan=false
# A delimiter character used to separate the start and end timestamps
# of a read query when 'querytimespan' is enabled.
querytimespandelimiter=,
# A unique key given to read, scan and delete operations when the
# operation should perform a group-by (multi-series aggregation) on one
# or more tags. If 'groupbyfunction' is set, this key will be given with
# the configured function.
groupbykey=YCSBGB
# A function name (e.g. 'sum', 'max' or 'avg') passed during reads,
# scans and deletes to cause the database to perform a group-by
# operation on one or more tags. If this value is empty or null
# (default), group-by operations are not performed
#groupbyfunction=
# A comma separated list of 0s or 1s to denote which of the tag keys
# should be grouped during group-by operations. The number of values
# must match the number of tags in 'tagcount'.
#groupbykeys=0,0,1,1
# A unique key given to read and scan operations when the operation
# should downsample the results of a query into lower resolution
# data. If 'downsamplingfunction' is set, this key will be given with
# the configured function.
downsamplingkey=YCSBDS
# A function name (e.g. 'sum', 'max' or 'avg') passed during reads and
# scans to cause the database to perform a downsampling operation
# returning lower resolution data. If this value is empty or null
# (default), downsampling is not performed.
#downsamplingfunction=
# A time interval for which to downsample the raw data into. Shares
# the same units as 'timestampinterval'. This value must be greater
# than 'timestampinterval'. E.g. if the timestamp interval for raw
# data is 60 seconds, the downsampling interval could be 3600 seconds
# to roll up the data into 1 hour buckets.
#downsamplinginterval=
# What proportion of operations are reads
readproportion=0.10
# What proportion of operations are updates
updateproportion=0.00
# What proportion of operations are inserts
insertproportion=0.90
# The distribution of requests across the keyspace
requestdistribution=zipfian
#requestdistribution=uniform
#requestdistribution=latest
# The name of the database table to run queries against
table=usertable
# Whether or not data should be validated during writes and reads. If
# set then the data type is always a 64 bit signed integer and is the
# hash code of the key, timestamp and tags.
dataintegrity=false
# How the latency measurements are presented
measurementtype=histogram
#measurementtype=timeseries
#measurementtype=raw
# When measurementtype is set to raw, measurements will be output
# as RAW datapoints in the following csv format:
# "operation, timestamp of the measurement, latency in us"
#
# Raw datapoints are collected in-memory while the test is running. Each
# data point consumes about 50 bytes (including java object overhead).
# For a typical run of 1 million to 10 million operations, this should
# fit into memory most of the time. If you plan to do 100s of millions of
# operations per run, consider provisioning a machine with larger RAM when using
# the RAW measurement type, or split the run into multiple runs.
#
# Optionally, you can specify an output file to save raw datapoints.
# Otherwise, raw datapoints will be written to stdout.
# The output file will be appended to if it already exists, otherwise
# a new output file will be created.
#measurement.raw.output_file = /tmp/your_output_file_for_this_run
# JVM Reporting.
#
# Measure JVM information over time including GC counts, max and min memory
# used, max and min thread counts, max and min system load and others. This
# setting must be enabled in conjunction with the "-s" flag to run the status
# thread. Every "status.interval", the status thread will capture JVM
# statistics and record the results. At the end of the run, max and mins will
# be recorded.
# measurement.trackjvm = false
# The range of latencies to track in the histogram (milliseconds)
histogram.buckets=1000
# Granularity for time series (in milliseconds)
timeseries.granularity=1000
# Latency reporting.
#
# YCSB records latency of failed operations separately from successful ones.
# Latency of all OK operations will be reported under their operation name,
# such as [READ], [UPDATE], etc.
#
# For failed operations:
# By default we don't track latency numbers of specific error status.
# We just report latency of all failed operation under one measurement name
# such as [READ-FAILED]. But optionally, user can configure to have either:
# 1. Record and report latency for each and every error status code by
# setting reportLatencyForEachError to true, or
# 2. Record and report latency for a select set of error status codes by
# providing a CSV list of Status codes via the "latencytrackederrors"
# property.
# reportlatencyforeacherror=false
# latencytrackederrors="<comma separated strings of error codes>"