diff --git a/README.md b/README.md
index 9651e7c56..bd80c3b0f 100644
--- a/README.md
+++ b/README.md
@@ -5,10 +5,10 @@ This code is a fork of work initially made public by InfluxDB at https://github.
 
 Current databases supported:
 
-+ TimescaleDB
-+ MongoDB
-+ InfluxDB
-+ Cassandra
++ TimescaleDB [(supplemental docs)](docs/timescaledb.md)
++ MongoDB [(supplemental docs)](docs/mongo.md)
++ InfluxDB [(supplemental docs)](docs/influx.md)
++ Cassandra [(supplemental docs)](docs/cassandra.md)
 
 ## Overview
 
diff --git a/cmd/tsbs_load_cassandra/main.go b/cmd/tsbs_load_cassandra/main.go
index b601a35a2..11736d049 100644
--- a/cmd/tsbs_load_cassandra/main.go
+++ b/cmd/tsbs_load_cassandra/main.go
@@ -47,7 +47,7 @@ func init() {
 	flag.StringVar(&hosts, "hosts", "localhost:9042", "Comma separated list of Cassandra hosts in a cluster.")
 
 	flag.IntVar(&replicationFactor, "replication-factor", 1, "Number of nodes that must have a copy of each key.")
-	flag.StringVar(&consistencyLevel, "consistency-level", "ALL", "Desired write consistency level. See Cassandra consistency documentation. Default: ALL")
+	flag.StringVar(&consistencyLevel, "consistency", "ALL", "Desired write consistency level. See Cassandra consistency documentation. Default: ALL")
 	flag.DurationVar(&writeTimeout, "write-timeout", 10*time.Second, "Write timeout.")
 
 	flag.Parse()
diff --git a/cmd/tsbs_load_influx/main.go b/cmd/tsbs_load_influx/main.go
index 417abaed6..f77ff19b4 100644
--- a/cmd/tsbs_load_influx/main.go
+++ b/cmd/tsbs_load_influx/main.go
@@ -19,7 +19,6 @@ import (
 	"time"
 
 	"bitbucket.org/440-labs/tsbs/load"
-	"github.com/pkg/profile"
 	"github.com/valyala/fasthttp"
 )
 
@@ -30,7 +29,6 @@ var (
 	backoff           time.Duration
 	useGzip           bool
 	doAbortOnExist    bool
-	memprofile        bool
 	consistency       string
 )
 
@@ -58,7 +56,6 @@ func init() {
 	flag.DurationVar(&backoff, "backoff", time.Second, "Time to sleep between requests when server indicates backpressure is needed.")
 	flag.BoolVar(&useGzip, "gzip", true, "Whether to gzip encode requests (default true).")
 	flag.BoolVar(&doAbortOnExist, "do-abort-on-exist", true, "Whether to abort if the destination database already exists.")
-	flag.BoolVar(&memprofile, "memprofile", false, "Whether to write a memprofile (file automatically determined).")
 
 	flag.Parse()
 
@@ -91,10 +88,6 @@ func (b *benchmark) GetProcessor() load.Processor {
 }
 
 func main() {
-	if memprofile {
-		p := profile.Start(profile.MemProfile)
-		defer p.Stop()
-	}
 	if loader.DoLoad() && loader.DoInit() {
 		daemonURL := daemonURLs[0] // pick first one since it always exists
 
diff --git a/cmd/tsbs_run_queries_cassandra/main.go b/cmd/tsbs_run_queries_cassandra/main.go
index ec0e12d89..2402f8180 100644
--- a/cmd/tsbs_run_queries_cassandra/main.go
+++ b/cmd/tsbs_run_queries_cassandra/main.go
@@ -34,11 +34,10 @@ var (
 
 // Program option vars:
 var (
-	daemonURL           string
-	aggrPlanLabel       string
-	subQueryParallelism int
-	requestTimeout      time.Duration
-	csiTimeout          time.Duration
+	daemonURL      string
+	aggrPlanLabel  string
+	requestTimeout time.Duration
+	csiTimeout     time.Duration
 )
 
 // Helpers for choice-like flags:
@@ -61,10 +60,9 @@ var (
 func init() {
 	runner = query.NewBenchmarkRunner()
 
-	flag.StringVar(&daemonURL, "url", "localhost:9042", "Cassandra URL.")
+	flag.StringVar(&daemonURL, "host", "localhost:9042", "Cassandra hostname and port combination.")
 	flag.StringVar(&aggrPlanLabel, "aggregation-plan", "", "Aggregation plan (choices: server, client)")
-	flag.IntVar(&subQueryParallelism, "subquery-workers", 1, "Number of concurrent subqueries to make (because the client does a scatter+gather operation).")
-	flag.DurationVar(&requestTimeout, "request-timeout", 1*time.Second, "Maximum request timeout.")
+	flag.DurationVar(&requestTimeout, "read-timeout", 1*time.Second, "Maximum request timeout.")
 	flag.DurationVar(&csiTimeout, "client-side-index-timeout", 10*time.Second, "Maximum client-side index timeout (only used at initialization).")
 
 	flag.Parse()
diff --git a/docs/cassandra.md b/docs/cassandra.md
new file mode 100644
index 000000000..5e45576f6
--- /dev/null
+++ b/docs/cassandra.md
@@ -0,0 +1,88 @@
+# TSBS Supplemental Guide: Cassandra
+
+Cassandra is a general column store database. This supplemental guide explains
+how the data generated for TSBS is stored, additional flags available when
+using the data importer (`tsbs_load_cassandra`), and additional flags
+available for the query runner (`tsbs_run_queries_cassandra`). **This
+should be read *after* the main README.**
+
+## Data format
+
+Data generated by `tsbs_generate_data` for Cassandra is a "pseudo-CSV" format.
+Each reading is a single line where the first comma-separated element with
+the following elements:
+* first, the table the reading belongs to (based on data type, e.g., `series_double` for doubles);
+* then, the data source (e.g., `cpu` for `cpu-only`);
+* then, several elements of the form `<label>=<value>` for tags;
+* then, the field label;
+* then, the date of the reading in YYYY-MM-DD form;
+* then, the timestamp in nanoseconds;
+* and finally, the reading itself.
+
+An example from `cpu-only`:
+```text
+series_double,cpu,hostname=host_0,region=eu-west-1,datacenter=eu-west-1b,rack=67,os=Ubuntu16.10,arch=x86,team=NYC,service=7,service_version=0,service_environment=production,usage_guest_nice,2016-01-01,1451606400000000000,38.2431182911542820
+```
+
+When stored, the elements starting with the data source (e.g. `cpu`) through
+the date of the reading are concatenated to serve as the primary key.
+
+---
+
+## `tsbs_load_cassandra` Additional Flags
+
+### Database related
+
+#### `-consistency` (type: `string`, default: `ALL`)
+
+Consistency level for writes to the database. Options are `ALL`, `ANY`, `ONE`,
+`TWO`, `THREE`, or `QUORUM`. Applies for multi-node cluster.
+
+#### `-hosts` (type: `string`, default: `localhost:9042`)
+
+Comma-separated list of hostname and port combinations for nodes in the cluster.
+
+#### `-replication-factor` (type: `int`, default: `1`)
+
+Level of replication for each write, i.e., number of nodes to store the
+data on. Only applies a multi-node cluster.
+
+#### `-write-timeout` (type: `duration`, default: `10s`)
+
+Length of the timeout for writes.
+It is expressed as a Golang time.Duration string, meaning a number followed
+by a unit abbreviation (s = seconds,
+m = minutes, h = hours), e.g., the default `10s` is ten seconds.
+
+
+---
+
+## `tsbs_run_queries_cassandra` Additional Flags
+
+### Database related
+
+#### `-aggregation-plan` (type: `string`, default: `client`)
+
+Method for doing aggregations in queries. Due to limitations in Cassandra's
+SQL-like language CQL, aggregations can be painful and slow if done on the
+server itself. Therefore the default is `client` (with the other valid option
+being `server`), where the client Go program handles the aggregation.
+
+#### `-client-side-index-timeout` (type: `duration`, default: `10s`)
+
+Length of the timeout when setting up the client side index, a data structure
+used to speed up queries by storing the tagsets/primary keys in memory on the
+client. It is expressed as a Golang time.Duration string, meaning a number followed by a unit abbreviation (s = seconds,
+m = minutes, h = hours), e.g., the default `10s` is ten seconds.
+
+#### `-host` (type: `string`, default: `localhost:9042`)
+
+Hostname and port combination of at least one node in the cluster. The library
+used will discover the other nodes for queries.
+
+#### `-read-timeout` (type: `duration`, default: `10s`)
+
+Length of the timeout for reads.
+It is expressed as a Golang time.Duration string, meaning a number followed
+by a unit abbreviation (s = seconds,
+m = minutes, h = hours), e.g., the default `10s` is ten seconds.
diff --git a/docs/influx.md b/docs/influx.md
new file mode 100644
index 000000000..929cd49c9
--- /dev/null
+++ b/docs/influx.md
@@ -0,0 +1,83 @@
+# TSBS Supplemental Guide: InfluxDB
+
+InfluxDB is a purpose-built time-series database written in Go from
+InfluxData. This supplemental guide explains how
+the data generated for TSBS is stored, additional flags available when
+using the data importer (`tsbs_load_influx`), and additional flags
+available for the query runner (`tsbs_run_queries_influx`). **This
+should be read *after* the main README.**
+
+## Data format
+
+Data generated by `tsbs_generate_data` for InfluxDB is serialized in a
+"pseudo-CSV" format. Each reading is composed of a single line where
+the name of the table is the first item, followed by several items of
+tags and fields that are in the format of `<label>=<value>`, and finally
+a space and then the timestamp for the reading.
+
+An example for the `cpu-only` use case:
+```text
+cpu,hostname=host_0,region=eu-central-1,datacenter=eu-central-1b,rack=21,os=Ubuntu15.10,arch=x86,team=SF,service=6,service_version=0,service_environment=test usage_user=58.1317132304976170,usage_system=2.6224297271376256,usage_idle=24.9969495069947882,usage_nice=61.5854484633778867,usage_iowait=22.9481393231639395,usage_irq=63.6499207106198313,usage_softirq=6.4098777048301052,usage_steal=44.8799140503027445,usage_guest=80.5028770761136201,usage_guest_nice=38.2431182911542820 1451606400000000000
+```
+
+---
+
+## `tsbs_load_influx` Additional Flags
+
+### Database related
+
+#### `-consistency` (type: `string`, default: `all`)
+
+Consistency level for writes to the database. Options are `all`, `any`, `one`,
+or `quorum`. Only applies for the clustered version.
+
+#### `-do-abort-on-exist` (type: `boolean`, default: `true`)
+
+Whether to abort the benchmark if the database named already exists. This is to
+prevent accidentally overwriting a database of the same name or a previous run
+of the benchmark.
+
+#### `-replication-factor` (type: `int`, default: `1`)
+
+Level of replication for each write, i.e., number of nodes to store the
+data on. Only applies for the clustered version.
+
+#### `-urls` (type: `string`, default: `http://localhost:8086`)
+
+Comma-separated list of URLs to connect to for inserting data. Workers will be
+distributed in a round robin fashion across the URLs.
+
+### Miscellaneous
+
+#### `-backoff` (type: `duration`, default: `1s`)
+
+The amount of time per retry attempt when the server says it is too busy. A
+longer backoff will potentially reduce write performance by waiting too long to
+retry, leaving the system idle. It is expressed as a Golang time.Duration
+string, meaning a number followed by a unit abbreviation (s = seconds,
+m = minutes, h = hours), e.g., the default `1s` is one second.
+
+#### `-gzip` (type: `boolean`, default: `true`)
+
+Whether to encode writes to the server with gzip. For best performance, encoding
+with gzip is the best choice, but if the server does not support or has gzip
+disabled, this flag should be set to false.
+
+---
+
+## `tsbs_run_queries_influx` Additional Flags
+
+### Database related
+
+#### `-chunk-response-size` (type: `int`, default: `0`)
+
+Number of series to return per response per query. If the query would generate
+a response that is very large, it could cause the server to crash with
+out-of-memory problems. This flag will chunk the response into multiple smaller
+responses to prevent the server from crashing. The default of 0 will return
+everything in a single response.
+
+#### `-urls` (type: `string`, default: `http://localhost:8086`)
+
+Comma-separated list of URLs to connect to for querying. Workers will be
+distributed in a round robin fashion across the URLs.
diff --git a/docs/mongo.md b/docs/mongo.md
new file mode 100644
index 000000000..75e050771
--- /dev/null
+++ b/docs/mongo.md
@@ -0,0 +1,80 @@
+# TSBS Supplemental Guide: MongoDB
+
+MongoDB is a general NoSQL database that stores data as JSON-like documents.
+This supplemental guide explains how the data generated for TSBS is stored, additional flags available when
+using the data importer (`tsbs_load_mongo`), and additional flags
+available for the query runner (`tsbs_run_queries_mongo`). **This
+should be read *after* the main README.**
+
+## Data format
+
+Data generated by `tsbs_generate_data` for MongoDB is serialized as a
+FlatBuffer to represent each reading. This format is not (easily) human readable
+in its serialized format, however the FlatBuffer is specified as follows:
+```text
+// mongo.fbs
+namespace serialize;
+table MongoTag {
+  key:string;
+  value:string;
+}
+
+table MongoReading {
+  key:string;
+  value:double;
+}
+
+table MongoPoint {
+  measurementName:string;
+  timestamp:long;
+  tags:[MongoTag];
+  fields:[MongoReading];
+}
+
+root_type MongoPoint;
+```
+
+---
+
+## `tsbs_load_mongo` Additional Flags
+
+### Database related
+
+#### `-url` (type: `string`, default: `localhost:27017`)
+
+URL for connecting to the MongoDB server daemon.
+
+#### `-write-timeout` (type: `duration`, default: `10s`)
+
+Length of the timeout for writes.
+It is expressed as a Golang time.Duration string, meaning a number followed
+by a unit abbreviation (s = seconds,
+m = minutes, h = hours), e.g., the default `10s` is ten seconds.
+
+
+### Miscellaneous
+
+#### `-document-per-event` (type: `boolean`, default: `false`)
+
+Store each data reading as a separate document instead of the default aggregated
+format. The default aggregated format stores an hour's worth of readings for
+a particular device in one document and uses updates for a more efficient
+storage model. However for testing or comparing, this flag is provided to use
+a model where each data reading is stored as a single document.
+
+---
+
+## `tsbs_run_queries_mongo` Additional Flags
+
+### Database related
+
+#### `-url` (type: `string`, default: `localhost:27017`)
+
+URL for connecting to the MongoDB server daemon.
+
+#### `-read-timeout` (type: `duration`, default: `10s`)
+
+Length of the timeout for reads.
+It is expressed as a Golang time.Duration string, meaning a number followed
+by a unit abbreviation (s = seconds,
+m = minutes, h = hours), e.g., the default `10s` is ten seconds.
diff --git a/docs/timescaledb.md b/docs/timescaledb.md
new file mode 100644
index 000000000..81d29c117
--- /dev/null
+++ b/docs/timescaledb.md
@@ -0,0 +1,182 @@
+# TSBS Supplemental Guide: TimescaleDB
+
+TimescaleDB is a database built on top of PostgreSQL, utilizing a SQL
+interface and RDBMS characteristics. This supplemental guide explains how
+the data generated for TSBS is stored, additional flags available when
+using the data importer (`tsbs_load_timescaledb`), and additional flags
+available for the query runner (`tsbs_run_queries_timescaledb`). **This
+should be read *after* the main README.**
+
+## Data format
+
+Data generated by `tsbs_generate_data` for TimescaleDB is serialized in a
+"pseudo-CSV" format, along with a custom header at the beginning. The
+header is several lines long:
+* one line composed of a comma-separated list of tag labels, with the literal string `tags` as the first value in the list
+* one or more lines composed of a comma-separated list of field labels, with the hypertable name as the first value in the list
+* a blank line
+
+An example for the `cpu-only` use case:
+```text
+tags,hostname,region,datacenter,rack,os,arch,team,service,service_version,service_environment
+cpu,usage_user,usage_system,usage_idle,usage_nice,usage_iowait,usage_irq,usage_softirq,usage_steal,usage_guest,usage_guest_nice
+
+```
+
+Following this, each reading is composed of two rows:
+1. a comma-separated list of tag values for the reading, with the literal string `tags` as the first value in the list
+1. a comma-separated list of field values for the reading, with the hypertable the reading belongs to being the first value and the timestamp as the second value
+
+An example for the `cpu-only` use case:
+```text
+tags,host_0,eu-central-1,eu-central-1b,21,Ubuntu15.10,x86,SF,6,0,test
+cpu,1451606400000000000,58.1317132304976170,2.6224297271376256,24.9969495069947882,61.5854484633778867,22.9481393231639395,63.6499207106198313,6.4098777048301052,44.8799140503027445,80.5028770761136201,38.2431182911542820
+```
+
+---
+
+## `tsbs_load_timescaledb` Additional Flags
+
+
+### PostgreSQL related
+
+#### `-host` (type: `string`, default: `localhost`)
+
+Hostname of the PostgreSQL server.
+
+#### `-postgres` (type: `string`, default: `sslmode=disable`)
+
+Specifies any connection parameters to pass along as the client
+connects to the PostgreSQL server. Values for `dbname`, `host`, and `user`
+in the connection string will be overridden with the values from the flags
+`-db-name`, `-host`, and `-user`, respectively. See the
+[PostgreSQL documentation][conn-str] for more details.
+
+#### `-use-hypertable` (type: `boolean`, default: `true`)
+
+Whether to actually use TimescaleDB's hypertable for storing data. Set to
+`false` to measure the insert/write performance of plain PostgreSQL.
+
+#### `-user` (type: `string`, default: `postgres`)
+
+User to use to connect to the PostgreSQL server.
+
+### Tags related
+
+#### `-in-table-partition-tag` (type: `boolean`, default: `false`)
+Whether to store the primary key tag as a column in the hypertable. By
+default the hypertable just stores the primary key (an int) of the tag set
+associated with the reading in the table. However this option can be more
+costly for some queries because of the need to do a JOIN. This option will
+store the primary tag (first tag in the list of tags in the data format) in
+the hypertable so that if most queries are using that as a filter, query
+performance can be improved.
+
+#### `-use-jsonb-tags` (type: `boolean`, default: `false`)
+Whether to store the tags as a JSONB element in the tags table. By default
+tags are stored in separate columns in a metadata table named `tags`, where
+each unique set of tags is stored in a separate row. Using this option will
+store the tags a JSONB element in `tags` instead. Write performance does not
+seem to be dramatically affected by this option, but query performance is
+typically better with non-JSONB tags so this defaults to `false`.
+
+
+### Hypertable related
+
+#### `-chunk-time` (type: `duration`, default `12h`)
+Size of each time partition in terms of time. It is expressed as a Golang
+time.Duration string, meaning a number followed by a unit abbreviation
+(s = seconds, m = minutes, h = hours), e.g., the default `12h` is 12 hours.
+This should be adjusted based on the dataset size.
+
+#### `-partitions` (type: `int`, default: `1`)
+Number of space partitions for the primary tag. Increasing this from 1 may
+be useful for larger number of devices, but further testing is still
+needed.
+
+
+### Index related
+
+#### `-field-index` (type: `string`, default: `VALUE-TIME`)
+The format for (any) field indexes, which are additional secondary indexes
+on fields in a hypertable. These are used for more performant threshold
+queries when the threshold is on a field rather than time, e.g.,
+`cpu.usage_user > 90`. The two valid options are:
+* `VALUE-TIME` which creates a compound index on `(<field>, time DESC)`
+* `TIME-VALUE` which creates a compound index on `(time DESC, <field>)`
+
+(`<field>` is replaced with the actual field name)
+
+#### `-field-index-count` (type: `int`, default: `0`)
+Number of secondary indexes to create on measurement fields, with `-1`
+signifying to create indexes on *all* fields. While secondary indexes can
+increase query performance, they will also increase disk usage and reduce
+write performance.
+
+#### `-partition-index` (type: `boolean`, default: `true`)
+Whether to create a compound index on the primary tag and time dimension
+(i.e., an index on `(tags_id, time DESC)`). Removing this index is likely
+to significantly reduce query performance.
+
+#### `-time-index` (type: `boolean`, default: `true`)
+Whether to create an index on the time dimension. For datasets with smaller
+number of devices (i.e., <100k), this is usually recommended. For a larger
+number of devices, `-time-partition-index` is recommended instead.
+
+#### `-time-partition-index` (type: `boolean`, default: `false`)
+Whether to create a compound index on the time dimension and the primary
+tag (i.e., an index on `(time DESC, tags_id)`).
+For datasets with a larger number of devices (i.e., >100k), this is
+usually recommended because it increases write performance by
+reducing lock contention on nodes in the
+B-tree since they are additionally partitioned by `tags_id`.
+
+
+### Miscellaneous
+
+#### `-hash-workers` (type: `boolean`, default: `false`)
+Whether to consistently hash data across the multiple insert workers by the
+value of the primary (first) tag. For datasets with larger numbers of
+devices, this option helps improve data locality on disk which can lead
+to better query performance. For datasets with smaller numbers of devices, it is typically not necessary.
+
+#### `-write-profile` (type: `string`, default: none)
+File to output periodic CPU and memory statistics. Useful for understanding
+system performance while writing data to the database.
+
+#### `-write-replication-stats` (type: `string`, default: none)
+File to output replication statistics. Useful for understanding how long it
+takes for data to be written in a replicated setup.
+
+---
+
+## `tsbs_run_queries_timescaledb` Additional Flags
+
+### PostgreSQL related
+
+#### `-hosts` (type: `string`, default: `localhost`)
+
+Comma separated list of hostnames for the PostgreSQL servers. Each server
+should contain a full copy/replica of the dataset. Workers are connected
+to a server in a round-robin fashion.
+
+#### `-postgres` (type: `string`, default: `sslmode=disable`)
+
+Specifies any connection parameters to pass along as the client
+connects to the PostgreSQL server. Values for `dbname`, `host`, and `user`
+in the connection string will be overridden with the values from the flags
+`-db-name`, `-hosts`, and `-user`, respectively. See the
+[PostgreSQL documentation][conn-str] for more details.
+
+#### `-show-explain` (type: `boolean`, default: `false`)
+
+Whether to print out a sample `EXPLAIN ANALYZE` output for the first query
+in the set of queries. This will be the only query run and is useful for
+understanding the query plan that is being generated for a particular
+query type.
+
+#### `-user` (type: `string`, default: `postgres`)
+
+User to use to connect to the PostgreSQL server(s).
+
+[conn-str]: https://www.postgresql.org/docs/10/static/libpq-connect.html