From 3869237825a5c2a45c436dc0c5684fd5fd31f878 Mon Sep 17 00:00:00 2001 From: Rob Kiefer Date: Mon, 2 Jul 2018 16:47:50 -0400 Subject: [PATCH] Add docs/ dir for supplemental doc material Each database should have a supplemental doc explaining important details about how the data is generated, perhaps how it is stored, and additional flags for its client binaries. Also, this removes any unnecessary flags from binaries. --- README.md | 8 +- cmd/tsbs_load_cassandra/main.go | 2 +- cmd/tsbs_load_influx/main.go | 7 - cmd/tsbs_run_queries_cassandra/main.go | 14 +- docs/cassandra.md | 88 ++++++++++++ docs/influx.md | 83 +++++++++++ docs/mongo.md | 80 +++++++++++ docs/timescaledb.md | 182 +++++++++++++++++++++++++ 8 files changed, 444 insertions(+), 20 deletions(-) create mode 100644 docs/cassandra.md create mode 100644 docs/influx.md create mode 100644 docs/mongo.md create mode 100644 docs/timescaledb.md diff --git a/README.md b/README.md index 9651e7c56..bd80c3b0f 100644 --- a/README.md +++ b/README.md @@ -5,10 +5,10 @@ This code is a fork of work initially made public by InfluxDB at https://github. Current databases supported: -+ TimescaleDB -+ MongoDB -+ InfluxDB -+ Cassandra ++ TimescaleDB [(supplemental docs)](docs/timescaledb.md) ++ MongoDB [(supplemental docs)](docs/mongo.md) ++ InfluxDB [(supplemental docs)](docs/influx.md) ++ Cassandra [(supplemental docs)](docs/cassandra.md) ## Overview diff --git a/cmd/tsbs_load_cassandra/main.go b/cmd/tsbs_load_cassandra/main.go index b601a35a2..11736d049 100644 --- a/cmd/tsbs_load_cassandra/main.go +++ b/cmd/tsbs_load_cassandra/main.go @@ -47,7 +47,7 @@ func init() { flag.StringVar(&hosts, "hosts", "localhost:9042", "Comma separated list of Cassandra hosts in a cluster.") flag.IntVar(&replicationFactor, "replication-factor", 1, "Number of nodes that must have a copy of each key.") - flag.StringVar(&consistencyLevel, "consistency-level", "ALL", "Desired write consistency level. See Cassandra consistency documentation. Default: ALL") + flag.StringVar(&consistencyLevel, "consistency", "ALL", "Desired write consistency level. See Cassandra consistency documentation. Default: ALL") flag.DurationVar(&writeTimeout, "write-timeout", 10*time.Second, "Write timeout.") flag.Parse() diff --git a/cmd/tsbs_load_influx/main.go b/cmd/tsbs_load_influx/main.go index 417abaed6..f77ff19b4 100644 --- a/cmd/tsbs_load_influx/main.go +++ b/cmd/tsbs_load_influx/main.go @@ -19,7 +19,6 @@ import ( "time" "bitbucket.org/440-labs/tsbs/load" - "github.com/pkg/profile" "github.com/valyala/fasthttp" ) @@ -30,7 +29,6 @@ var ( backoff time.Duration useGzip bool doAbortOnExist bool - memprofile bool consistency string ) @@ -58,7 +56,6 @@ func init() { flag.DurationVar(&backoff, "backoff", time.Second, "Time to sleep between requests when server indicates backpressure is needed.") flag.BoolVar(&useGzip, "gzip", true, "Whether to gzip encode requests (default true).") flag.BoolVar(&doAbortOnExist, "do-abort-on-exist", true, "Whether to abort if the destination database already exists.") - flag.BoolVar(&memprofile, "memprofile", false, "Whether to write a memprofile (file automatically determined).") flag.Parse() @@ -91,10 +88,6 @@ func (b *benchmark) GetProcessor() load.Processor { } func main() { - if memprofile { - p := profile.Start(profile.MemProfile) - defer p.Stop() - } if loader.DoLoad() && loader.DoInit() { daemonURL := daemonURLs[0] // pick first one since it always exists diff --git a/cmd/tsbs_run_queries_cassandra/main.go b/cmd/tsbs_run_queries_cassandra/main.go index ec0e12d89..2402f8180 100644 --- a/cmd/tsbs_run_queries_cassandra/main.go +++ b/cmd/tsbs_run_queries_cassandra/main.go @@ -34,11 +34,10 @@ var ( // Program option vars: var ( - daemonURL string - aggrPlanLabel string - subQueryParallelism int - requestTimeout time.Duration - csiTimeout time.Duration + daemonURL string + aggrPlanLabel string + requestTimeout time.Duration + csiTimeout time.Duration ) // Helpers for choice-like flags: @@ -61,10 +60,9 @@ var ( func init() { runner = query.NewBenchmarkRunner() - flag.StringVar(&daemonURL, "url", "localhost:9042", "Cassandra URL.") + flag.StringVar(&daemonURL, "host", "localhost:9042", "Cassandra hostname and port combination.") flag.StringVar(&aggrPlanLabel, "aggregation-plan", "", "Aggregation plan (choices: server, client)") - flag.IntVar(&subQueryParallelism, "subquery-workers", 1, "Number of concurrent subqueries to make (because the client does a scatter+gather operation).") - flag.DurationVar(&requestTimeout, "request-timeout", 1*time.Second, "Maximum request timeout.") + flag.DurationVar(&requestTimeout, "read-timeout", 1*time.Second, "Maximum request timeout.") flag.DurationVar(&csiTimeout, "client-side-index-timeout", 10*time.Second, "Maximum client-side index timeout (only used at initialization).") flag.Parse() diff --git a/docs/cassandra.md b/docs/cassandra.md new file mode 100644 index 000000000..5e45576f6 --- /dev/null +++ b/docs/cassandra.md @@ -0,0 +1,88 @@ +# TSBS Supplemental Guide: Cassandra + +Cassandra is a general column store database. This supplemental guide explains +how the data generated for TSBS is stored, additional flags available when +using the data importer (`tsbs_load_cassandra`), and additional flags +available for the query runner (`tsbs_run_queries_cassandra`). **This +should be read *after* the main README.** + +## Data format + +Data generated by `tsbs_generate_data` for Cassandra is a "pseudo-CSV" format. +Each reading is a single line where the first comma-separated element with +the following elements: +* first, the table the reading belongs to (based on data type, e.g., `series_double` for doubles); +* then, the data source (e.g., `cpu` for `cpu-only`); +* then, several elements of the form `