diff --git a/README.md b/README.md index 9651e7c56..bd80c3b0f 100644 --- a/README.md +++ b/README.md @@ -5,10 +5,10 @@ This code is a fork of work initially made public by InfluxDB at https://github. Current databases supported: -+ TimescaleDB -+ MongoDB -+ InfluxDB -+ Cassandra ++ TimescaleDB [(supplemental docs)](docs/timescaledb.md) ++ MongoDB [(supplemental docs)](docs/mongo.md) ++ InfluxDB [(supplemental docs)](docs/influx.md) ++ Cassandra [(supplemental docs)](docs/cassandra.md) ## Overview diff --git a/cmd/tsbs_load_cassandra/main.go b/cmd/tsbs_load_cassandra/main.go index b601a35a2..11736d049 100644 --- a/cmd/tsbs_load_cassandra/main.go +++ b/cmd/tsbs_load_cassandra/main.go @@ -47,7 +47,7 @@ func init() { flag.StringVar(&hosts, "hosts", "localhost:9042", "Comma separated list of Cassandra hosts in a cluster.") flag.IntVar(&replicationFactor, "replication-factor", 1, "Number of nodes that must have a copy of each key.") - flag.StringVar(&consistencyLevel, "consistency-level", "ALL", "Desired write consistency level. See Cassandra consistency documentation. Default: ALL") + flag.StringVar(&consistencyLevel, "consistency", "ALL", "Desired write consistency level. See Cassandra consistency documentation. Default: ALL") flag.DurationVar(&writeTimeout, "write-timeout", 10*time.Second, "Write timeout.") flag.Parse() diff --git a/cmd/tsbs_load_influx/main.go b/cmd/tsbs_load_influx/main.go index 417abaed6..f77ff19b4 100644 --- a/cmd/tsbs_load_influx/main.go +++ b/cmd/tsbs_load_influx/main.go @@ -19,7 +19,6 @@ import ( "time" "bitbucket.org/440-labs/tsbs/load" - "github.com/pkg/profile" "github.com/valyala/fasthttp" ) @@ -30,7 +29,6 @@ var ( backoff time.Duration useGzip bool doAbortOnExist bool - memprofile bool consistency string ) @@ -58,7 +56,6 @@ func init() { flag.DurationVar(&backoff, "backoff", time.Second, "Time to sleep between requests when server indicates backpressure is needed.") flag.BoolVar(&useGzip, "gzip", true, "Whether to gzip encode requests (default true).") flag.BoolVar(&doAbortOnExist, "do-abort-on-exist", true, "Whether to abort if the destination database already exists.") - flag.BoolVar(&memprofile, "memprofile", false, "Whether to write a memprofile (file automatically determined).") flag.Parse() @@ -91,10 +88,6 @@ func (b *benchmark) GetProcessor() load.Processor { } func main() { - if memprofile { - p := profile.Start(profile.MemProfile) - defer p.Stop() - } if loader.DoLoad() && loader.DoInit() { daemonURL := daemonURLs[0] // pick first one since it always exists diff --git a/cmd/tsbs_run_queries_cassandra/main.go b/cmd/tsbs_run_queries_cassandra/main.go index ec0e12d89..2402f8180 100644 --- a/cmd/tsbs_run_queries_cassandra/main.go +++ b/cmd/tsbs_run_queries_cassandra/main.go @@ -34,11 +34,10 @@ var ( // Program option vars: var ( - daemonURL string - aggrPlanLabel string - subQueryParallelism int - requestTimeout time.Duration - csiTimeout time.Duration + daemonURL string + aggrPlanLabel string + requestTimeout time.Duration + csiTimeout time.Duration ) // Helpers for choice-like flags: @@ -61,10 +60,9 @@ var ( func init() { runner = query.NewBenchmarkRunner() - flag.StringVar(&daemonURL, "url", "localhost:9042", "Cassandra URL.") + flag.StringVar(&daemonURL, "host", "localhost:9042", "Cassandra hostname and port combination.") flag.StringVar(&aggrPlanLabel, "aggregation-plan", "", "Aggregation plan (choices: server, client)") - flag.IntVar(&subQueryParallelism, "subquery-workers", 1, "Number of concurrent subqueries to make (because the client does a scatter+gather operation).") - flag.DurationVar(&requestTimeout, "request-timeout", 1*time.Second, "Maximum request timeout.") + flag.DurationVar(&requestTimeout, "read-timeout", 1*time.Second, "Maximum request timeout.") flag.DurationVar(&csiTimeout, "client-side-index-timeout", 10*time.Second, "Maximum client-side index timeout (only used at initialization).") flag.Parse() diff --git a/docs/cassandra.md b/docs/cassandra.md new file mode 100644 index 000000000..5e45576f6 --- /dev/null +++ b/docs/cassandra.md @@ -0,0 +1,88 @@ +# TSBS Supplemental Guide: Cassandra + +Cassandra is a general column store database. This supplemental guide explains +how the data generated for TSBS is stored, additional flags available when +using the data importer (`tsbs_load_cassandra`), and additional flags +available for the query runner (`tsbs_run_queries_cassandra`). **This +should be read *after* the main README.** + +## Data format + +Data generated by `tsbs_generate_data` for Cassandra is a "pseudo-CSV" format. +Each reading is a single line where the first comma-separated element with +the following elements: +* first, the table the reading belongs to (based on data type, e.g., `series_double` for doubles); +* then, the data source (e.g., `cpu` for `cpu-only`); +* then, several elements of the form `