diff --git a/CHANGELOG.md b/CHANGELOG.md index 52833472abde0..d1c165c09365b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ### Features +* [1686](https://github.com/grafana/loki/pull/1686) **owen-d**: Introduces the `distributor.max-line-size` flag and associated yaml config. When enabled, lines longer than this config will not be accepted. * [1662](https://github.com/grafana/loki/pull/1662) **owen-d**: Introduces binary operators in LogQL * [1572](https://github.com/grafana/loki/pull/1572) **owen-d**: Introduces the `querier.query-ingesters-within` flag and associated yaml config. When enabled, queries for a time range that do not overlap this lookback interval will not be sent to the ingesters. * [1558](https://github.com/grafana/loki/pull/1558) **owen-d**: Introduces `ingester.max-chunk-age` which specifies the maximum chunk age before it's cut. diff --git a/docs/configuration/README.md b/docs/configuration/README.md index dc36446c77298..a5fe98c09fd9c 100644 --- a/docs/configuration/README.md +++ b/docs/configuration/README.md @@ -781,6 +781,10 @@ logs in Loki. # Maximum number of active streams per user, per ingester. 0 to disable. [max_streams_per_user: | default = 10000] +# Maximum line size on ingestion path. Example: 256kb. +# There is no limit when unset. +[max_line_size: | default = none ] + # Maximum number of active streams per user, across the cluster. 0 to disable. # When the global limit is enabled, each ingester is configured with a dynamic # local limit based on the replication factor and the current number of healthy diff --git a/go.mod b/go.mod index 0497757096b59..9a48d1aad680d 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/Microsoft/go-winio v0.4.12 // indirect github.com/blang/semver v3.5.1+incompatible // indirect github.com/bmatcuk/doublestar v1.2.2 + github.com/c2h5oh/datasize v0.0.0-20200112174442-28bbd4740fee github.com/containerd/containerd v1.3.2 // indirect github.com/containerd/fifo v0.0.0-20190226154929-a9fb20d87448 // indirect github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e diff --git a/go.sum b/go.sum index 8291a2968d452..ffc32fa5f1361 100644 --- a/go.sum +++ b/go.sum @@ -128,6 +128,8 @@ github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4Yn github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= github.com/bradfitz/gomemcache v0.0.0-20190913173617-a41fca850d0b h1:L/QXpzIa3pOvUGt1D1lA5KjYhPBAN/3iWdP7xeFS9F0= github.com/bradfitz/gomemcache v0.0.0-20190913173617-a41fca850d0b/go.mod h1:H0wQNHz2YrLsuXOZozoeDmnHXkNCRmMW0gwFWDfEZDA= +github.com/c2h5oh/datasize v0.0.0-20200112174442-28bbd4740fee h1:BnPxIde0gjtTnc9Er7cxvBk8DHLWhEux0SxayC8dP6I= +github.com/c2h5oh/datasize v0.0.0-20200112174442-28bbd4740fee/go.mod h1:S/7n9copUssQ56c7aAgHqftWO4LTf4xY6CGWt8Bc+3M= github.com/cenkalti/backoff v0.0.0-20181003080854-62661b46c409/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= github.com/cenkalti/backoff v1.0.0/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= github.com/census-instrumentation/opencensus-proto v0.2.1 h1:glEXhBS5PSLLv4IXzLA5yPRVX4bilULVyxxbrfOtDAk= diff --git a/pkg/distributor/distributor.go b/pkg/distributor/distributor.go index 653daeebb5306..28c0e9752032d 100644 --- a/pkg/distributor/distributor.go +++ b/pkg/distributor/distributor.go @@ -12,7 +12,6 @@ import ( "github.com/cortexproject/cortex/pkg/ring" cortex_util "github.com/cortexproject/cortex/pkg/util" "github.com/cortexproject/cortex/pkg/util/limiter" - cortex_validation "github.com/cortexproject/cortex/pkg/util/validation" "github.com/go-kit/kit/log/level" "github.com/opentracing/opentracing-go" @@ -76,7 +75,7 @@ type Distributor struct { cfg Config clientCfg client.Config ingestersRing ring.ReadRing - overrides *validation.Overrides + validator *Validator pool *cortex_client.Pool // The global rate limiter requires a distributors ring to count @@ -96,6 +95,11 @@ func New(cfg Config, clientCfg client.Config, ingestersRing ring.ReadRing, overr } } + validator, err := NewValidator(overrides) + if err != nil { + return nil, err + } + // Create the configured ingestion rate limit strategy (local or global). var ingestionRateStrategy limiter.RateLimiterStrategy var distributorsRing *ring.Lifecycler @@ -119,7 +123,7 @@ func New(cfg Config, clientCfg client.Config, ingestersRing ring.ReadRing, overr clientCfg: clientCfg, ingestersRing: ingestersRing, distributorsRing: distributorsRing, - overrides: overrides, + validator: validator, pool: cortex_client.NewPool(clientCfg.PoolConfig, ingestersRing, factory, cortex_util.Logger), ingestionRateLimiter: limiter.NewRateLimiter(ingestionRateStrategy, 10*time.Second), } @@ -194,16 +198,14 @@ func (d *Distributor) Push(ctx context.Context, req *logproto.PushRequest) (*log validatedSamplesCount := 0 for _, stream := range req.Streams { - if err := d.validateLabels(userID, stream.Labels); err != nil { + if err := d.validator.ValidateLabels(userID, stream.Labels); err != nil { validationErr = err continue } entries := make([]logproto.Entry, 0, len(stream.Entries)) for _, entry := range stream.Entries { - if err := cortex_validation.ValidateSample(d.overrides, userID, metricName, cortex_client.Sample{ - TimestampMs: entry.Timestamp.UnixNano() / int64(time.Millisecond), - }); err != nil { + if err := d.validator.ValidateEntry(userID, entry); err != nil { validationErr = err continue } @@ -281,16 +283,6 @@ func (d *Distributor) Push(ctx context.Context, req *logproto.PushRequest) (*log } } -func (d *Distributor) validateLabels(userID, labels string) error { - ls, err := util.ToClientLabels(labels) - if err != nil { - return httpgrpc.Errorf(http.StatusBadRequest, err.Error()) - } - - // everything in `ValidateLabels` returns `httpgrpc.Errorf` errors, no sugaring needed - return cortex_validation.ValidateLabels(d.overrides, userID, ls) -} - // TODO taken from Cortex, see if we can refactor out an usable interface. func (d *Distributor) sendSamples(ctx context.Context, ingester ring.IngesterDesc, streamTrackers []*streamTracker, pushTracker *pushTracker) { err := d.sendSamplesErr(ctx, ingester, streamTrackers) diff --git a/pkg/distributor/distributor_test.go b/pkg/distributor/distributor_test.go index 1627274f3650a..2737348c27e34 100644 --- a/pkg/distributor/distributor_test.go +++ b/pkg/distributor/distributor_test.go @@ -26,6 +26,7 @@ import ( "github.com/grafana/loki/pkg/ingester/client" "github.com/grafana/loki/pkg/logproto" + fe "github.com/grafana/loki/pkg/util/flagext" "github.com/grafana/loki/pkg/util/validation" ) @@ -43,6 +44,8 @@ func TestDistributor(t *testing.T) { for i, tc := range []struct { lines int + maxLineSize uint64 + mangleLabels bool expectedResponse *logproto.PushResponse expectedError error }{ @@ -54,6 +57,18 @@ func TestDistributor(t *testing.T) { lines: 100, expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, "ingestion rate limit (100 bytes) exceeded while adding 100 lines for a total size of 1000 bytes"), }, + { + lines: 100, + maxLineSize: 1, + expectedResponse: success, + expectedError: httpgrpc.Errorf(http.StatusBadRequest, "max line size (1B) exceeded while adding (10B) size line"), + }, + { + lines: 100, + mangleLabels: true, + expectedResponse: success, + expectedError: httpgrpc.Errorf(http.StatusBadRequest, "parse error at line 1, col 4: literal not terminated"), + }, } { t.Run(fmt.Sprintf("[%d](samples=%v)", i, tc.lines), func(t *testing.T) { limits := &validation.Limits{} @@ -61,10 +76,16 @@ func TestDistributor(t *testing.T) { limits.EnforceMetricName = false limits.IngestionRateMB = ingestionRateLimit limits.IngestionBurstSizeMB = ingestionRateLimit + limits.MaxLineSize = fe.ByteSize(tc.maxLineSize) d := prepare(t, limits, nil) request := makeWriteRequest(tc.lines, 10) + + if tc.mangleLabels { + request.Streams[0].Labels = `{ab"` + } + response, err := d.Push(ctx, request) assert.Equal(t, tc.expectedResponse, response) assert.Equal(t, tc.expectedError, err) diff --git a/pkg/distributor/limits.go b/pkg/distributor/limits.go new file mode 100644 index 0000000000000..520cc514fe4a3 --- /dev/null +++ b/pkg/distributor/limits.go @@ -0,0 +1,16 @@ +package distributor + +import "time" + +// Limits is an interface for distributor limits/related configs +type Limits interface { + MaxLineSize(userID string) int + EnforceMetricName(userID string) bool + MaxLabelNamesPerSeries(userID string) int + MaxLabelNameLength(userID string) int + MaxLabelValueLength(userID string) int + + CreationGracePeriod(userID string) time.Duration + RejectOldSamples(userID string) bool + RejectOldSamplesMaxAge(userID string) time.Duration +} diff --git a/pkg/distributor/validator.go b/pkg/distributor/validator.go new file mode 100644 index 0000000000000..9e7510b985136 --- /dev/null +++ b/pkg/distributor/validator.go @@ -0,0 +1,62 @@ +package distributor + +import ( + "errors" + "net/http" + "time" + + cortex_client "github.com/cortexproject/cortex/pkg/ingester/client" + cortex_validation "github.com/cortexproject/cortex/pkg/util/validation" + "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/pkg/util/flagext" + "github.com/weaveworks/common/httpgrpc" +) + +type Validator struct { + Limits +} + +func NewValidator(l Limits) (*Validator, error) { + if l == nil { + return nil, errors.New("nil Limits") + } + return &Validator{l}, nil +} + +// ValidateEntry returns an error if the entry is invalid +func (v Validator) ValidateEntry(userID string, entry logproto.Entry) error { + if err := cortex_validation.ValidateSample(v, userID, metricName, cortex_client.Sample{ + TimestampMs: entry.Timestamp.UnixNano() / int64(time.Millisecond), + }); err != nil { + return err + } + + if maxSize := v.MaxLineSize(userID); maxSize != 0 && len(entry.Line) > maxSize { + // I wish we didn't return httpgrpc errors here as it seems + // an orthogonal concept (we need not use ValidateLabels in this context) + // but the upstream cortex_validation pkg uses it, so we keep this + // for parity. + return httpgrpc.Errorf( + http.StatusBadRequest, + "max line size (%s) exceeded while adding (%s) size line", + flagext.ByteSize(uint64(maxSize)).String(), + flagext.ByteSize(uint64(len(entry.Line))).String(), + ) + } + + return nil +} + +// Validate labels returns an error if the labels are invalid +func (v Validator) ValidateLabels(userID string, labels string) error { + ls, err := util.ToClientLabels(labels) + if err != nil { + // I wish we didn't return httpgrpc errors here as it seems + // an orthogonal concept (we need not use ValidateLabels in this context) + // but the upstream cortex_validation pkg uses it, so we keep this + // for parity. + return httpgrpc.Errorf(http.StatusBadRequest, err.Error()) + } + return cortex_validation.ValidateLabels(v, userID, ls) +} diff --git a/pkg/util/flagext/bytesize.go b/pkg/util/flagext/bytesize.go new file mode 100644 index 0000000000000..76c61d69d413b --- /dev/null +++ b/pkg/util/flagext/bytesize.go @@ -0,0 +1,46 @@ +package flagext + +import ( + "strings" + + "github.com/c2h5oh/datasize" +) + +// ByteSize is a flag parsing compatibility type for constructing human friendly sizes. +// It implements flag.Value & flag.Getter. +type ByteSize uint64 + +func (bs ByteSize) String() string { + return datasize.ByteSize(bs).String() +} + +func (bs *ByteSize) Set(s string) error { + var v datasize.ByteSize + + // Bytesize currently doesn't handle things like Mb, but only handles MB. + // Therefore we capitalize just for convenience + if err := v.UnmarshalText([]byte(strings.ToUpper(s))); err != nil { + return err + } + *bs = ByteSize(v.Bytes()) + return nil +} + +func (bs ByteSize) Get() interface{} { + return bs.Val() +} + +func (bs ByteSize) Val() int { + return int(bs) +} + +/// UnmarshalYAML the Unmarshaler interface of the yaml pkg. +func (bs *ByteSize) UnmarshalYAML(unmarshal func(interface{}) error) error { + var str string + err := unmarshal(&str) + if err != nil { + return err + } + + return bs.Set(str) +} diff --git a/pkg/util/flagext/bytesize_test.go b/pkg/util/flagext/bytesize_test.go new file mode 100644 index 0000000000000..95331af97797d --- /dev/null +++ b/pkg/util/flagext/bytesize_test.go @@ -0,0 +1,104 @@ +package flagext + +import ( + "testing" + + "github.com/stretchr/testify/require" + "gopkg.in/yaml.v2" +) + +func Test_ByteSize(t *testing.T) { + for _, tc := range []struct { + in string + err bool + out int + }{ + { + in: "abc", + err: true, + }, + { + in: "", + err: false, + out: 0, + }, + { + in: "0", + err: false, + out: 0, + }, + { + in: "1b", + err: false, + out: 1, + }, + { + in: "100kb", + err: false, + out: 100 << 10, + }, + { + in: "100 KB", + err: false, + out: 100 << 10, + }, + { + // ensure lowercase works + in: "50mb", + err: false, + out: 50 << 20, + }, + { + // ensure mixed capitalization works + in: "50Mb", + err: false, + out: 50 << 20, + }, + { + in: "256GB", + err: false, + out: 256 << 30, + }, + } { + t.Run(tc.in, func(t *testing.T) { + var bs ByteSize + + err := bs.Set(tc.in) + if tc.err { + require.NotNil(t, err) + } else { + require.Nil(t, err) + require.Equal(t, tc.out, bs.Get().(int)) + } + + }) + } +} + +func Test_ByteSizeYAML(t *testing.T) { + for _, tc := range []struct { + in string + err bool + out ByteSize + }{ + { + in: "256GB", + out: ByteSize(256 << 30), + }, + { + in: "abc", + err: true, + }, + } { + t.Run(tc.in, func(t *testing.T) { + var out ByteSize + err := yaml.Unmarshal([]byte(tc.in), &out) + if tc.err { + require.NotNil(t, err) + } else { + require.Nil(t, err) + require.Equal(t, tc.out, out) + } + }) + } +} diff --git a/pkg/util/validation/limits.go b/pkg/util/validation/limits.go index a14884572f798..34ed39745dfeb 100644 --- a/pkg/util/validation/limits.go +++ b/pkg/util/validation/limits.go @@ -3,6 +3,8 @@ package validation import ( "flag" "time" + + "github.com/grafana/loki/pkg/util/flagext" ) const ( @@ -19,16 +21,17 @@ const ( // limits via flags, or per-user limits via yaml config. type Limits struct { // Distributor enforced limits. - IngestionRateStrategy string `yaml:"ingestion_rate_strategy"` - IngestionRateMB float64 `yaml:"ingestion_rate_mb"` - IngestionBurstSizeMB float64 `yaml:"ingestion_burst_size_mb"` - MaxLabelNameLength int `yaml:"max_label_name_length"` - MaxLabelValueLength int `yaml:"max_label_value_length"` - MaxLabelNamesPerSeries int `yaml:"max_label_names_per_series"` - RejectOldSamples bool `yaml:"reject_old_samples"` - RejectOldSamplesMaxAge time.Duration `yaml:"reject_old_samples_max_age"` - CreationGracePeriod time.Duration `yaml:"creation_grace_period"` - EnforceMetricName bool `yaml:"enforce_metric_name"` + IngestionRateStrategy string `yaml:"ingestion_rate_strategy"` + IngestionRateMB float64 `yaml:"ingestion_rate_mb"` + IngestionBurstSizeMB float64 `yaml:"ingestion_burst_size_mb"` + MaxLabelNameLength int `yaml:"max_label_name_length"` + MaxLabelValueLength int `yaml:"max_label_value_length"` + MaxLabelNamesPerSeries int `yaml:"max_label_names_per_series"` + RejectOldSamples bool `yaml:"reject_old_samples"` + RejectOldSamplesMaxAge time.Duration `yaml:"reject_old_samples_max_age"` + CreationGracePeriod time.Duration `yaml:"creation_grace_period"` + EnforceMetricName bool `yaml:"enforce_metric_name"` + MaxLineSize flagext.ByteSize `yaml:"max_line_size"` // Ingester enforced limits. MaxLocalStreamsPerUser int `yaml:"max_streams_per_user"` @@ -55,6 +58,7 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) { f.StringVar(&l.IngestionRateStrategy, "distributor.ingestion-rate-limit-strategy", "local", "Whether the ingestion rate limit should be applied individually to each distributor instance (local), or evenly shared across the cluster (global).") f.Float64Var(&l.IngestionRateMB, "distributor.ingestion-rate-limit-mb", 4, "Per-user ingestion rate limit in sample size per second. Units in MB.") f.Float64Var(&l.IngestionBurstSizeMB, "distributor.ingestion-burst-size-mb", 6, "Per-user allowed ingestion burst size (in sample size). Units in MB.") + f.Var(&l.MaxLineSize, "distributor.max-line-size", "maximum line length allowed, i.e. 100mb. Default (0) means unlimited.") f.IntVar(&l.MaxLabelNameLength, "validation.max-length-label-name", 1024, "Maximum length accepted for label names") f.IntVar(&l.MaxLabelValueLength, "validation.max-length-label-value", 2048, "Maximum length accepted for label value. This setting also applies to the metric name") f.IntVar(&l.MaxLabelNamesPerSeries, "validation.max-label-names-per-series", 30, "Maximum number of label names per series.") @@ -227,6 +231,11 @@ func (o *Overrides) MaxConcurrentTailRequests(userID string) int { return o.getOverridesForUser(userID).MaxConcurrentTailRequests } +// MaxLineSize returns the maximum size in bytes the distributor should allow. +func (o *Overrides) MaxLineSize(userID string) int { + return o.getOverridesForUser(userID).MaxLineSize.Val() +} + func (o *Overrides) getOverridesForUser(userID string) *Limits { if o.tenantLimits != nil { l := o.tenantLimits(userID) diff --git a/vendor/github.com/c2h5oh/datasize/.gitignore b/vendor/github.com/c2h5oh/datasize/.gitignore new file mode 100644 index 0000000000000..daf913b1b347a --- /dev/null +++ b/vendor/github.com/c2h5oh/datasize/.gitignore @@ -0,0 +1,24 @@ +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe +*.test +*.prof diff --git a/vendor/github.com/c2h5oh/datasize/.travis.yml b/vendor/github.com/c2h5oh/datasize/.travis.yml new file mode 100644 index 0000000000000..204e70089fe08 --- /dev/null +++ b/vendor/github.com/c2h5oh/datasize/.travis.yml @@ -0,0 +1,14 @@ +sudo: false + +language: go +go: + - 1.4 + - 1.5 + - 1.6 + - 1.7 + - 1.8 + - 1.9 + - tip + +script: + - go test -v diff --git a/vendor/github.com/c2h5oh/datasize/LICENSE b/vendor/github.com/c2h5oh/datasize/LICENSE new file mode 100644 index 0000000000000..f2ba916e612ca --- /dev/null +++ b/vendor/github.com/c2h5oh/datasize/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2016 Maciej Lisiewski + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/github.com/c2h5oh/datasize/README.md b/vendor/github.com/c2h5oh/datasize/README.md new file mode 100644 index 0000000000000..ac0cf8586e9f2 --- /dev/null +++ b/vendor/github.com/c2h5oh/datasize/README.md @@ -0,0 +1,77 @@ +# datasize [![Build Status](https://travis-ci.org/c2h5oh/datasize.svg?branch=master)](https://travis-ci.org/c2h5oh/datasize) + +Golang helpers for data sizes + +### Constants + +Just like `time` package provides `time.Second`, `time.Day` constants `datasize` provides: + +* `datasize.B` 1 byte +* `datasize.KB` 1 kilobyte +* `datasize.MB` 1 megabyte +* `datasize.GB` 1 gigabyte +* `datasize.TB` 1 terabyte +* `datasize.PB` 1 petabyte +* `datasize.EB` 1 exabyte + +### Helpers + +Just like `time` package provides `duration.Nanoseconds() uint64 `, `duration.Hours() float64` helpers `datasize` has. + +* `ByteSize.Bytes() uint64` +* `ByteSize.Kilobytes() float4` +* `ByteSize.Megabytes() float64` +* `ByteSize.Gigabytes() float64` +* `ByteSize.Terabytes() float64` +* `ByteSize.Petebytes() float64` +* `ByteSize.Exabytes() float64` + +Warning: see limitations at the end of this document about a possible precission loss + +### Parsing strings + +`datasize.ByteSize` implements `TextUnmarshaler` interface and will automatically parse human readable strings into correct values where it is used: + +* `"10 MB"` -> `10* datasize.MB` +* `"10240 g"` -> `10 * datasize.TB` +* `"2000"` -> `2000 * datasize.B` +* `"1tB"` -> `datasize.TB` +* `"5 peta"` -> `5 * datasize.PB` +* `"28 kilobytes"` -> `28 * datasize.KB` +* `"1 gigabyte"` -> `1 * datasize.GB` + +You can also do it manually: + +```go +var v datasize.ByteSize +err := v.UnmarshalText([]byte("100 mb")) +``` + +### Printing + +`Bytesize.String()` uses largest unit allowing an integer value: + +* `(102400 * datasize.MB).String()` -> `"100GB"` +* `(datasize.MB + datasize.KB).String()` -> `"1025KB"` + +Use `%d` format string to get value in bytes without a unit. + +### JSON and other encoding + +Both `TextMarshaler` and `TextUnmarshaler` interfaces are implemented - JSON will just work. Other encoders will work provided they use those interfaces. + +### Human readable + +`ByteSize.HumanReadable()` or `ByteSize.HR()` returns a string with 1-3 digits, followed by 1 decimal place, a space and unit big enough to get 1-3 digits + +* `(102400 * datasize.MB).String()` -> `"100.0 GB"` +* `(datasize.MB + 512 * datasize.KB).String()` -> `"1.5 MB"` + +### Limitations + +* The underlying data type for `data.ByteSize` is `uint64`, so values outside of 0 to 2^64-1 range will overflow +* size helper functions (like `ByteSize.Kilobytes()`) return `float64`, which can't represent all possible values of `uint64` accurately: + * if the returned value is supposed to have no fraction (ie `(10 * datasize.MB).Kilobytes()`) accuracy loss happens when value is more than 2^53 larger than unit: `.Kilobytes()` over 8 petabytes, `.Megabytes()` over 8 exabytes + * if the returned value is supposed to have a fraction (ie `(datasize.PB + datasize.B).Megabytes()`) in addition to the above note accuracy loss may occur in fractional part too - larger integer part leaves fewer bytes to store fractional part, the smaller the remainder vs unit the move bytes are required to store the fractional part +* Parsing a string with `Mb`, `Tb`, etc units will return a syntax error, because capital followed by lower case is commonly used for bits, not bytes +* Parsing a string with value exceeding 2^64-1 bytes will return 2^64-1 and an out of range error diff --git a/vendor/github.com/c2h5oh/datasize/datasize.go b/vendor/github.com/c2h5oh/datasize/datasize.go new file mode 100644 index 0000000000000..6754788162496 --- /dev/null +++ b/vendor/github.com/c2h5oh/datasize/datasize.go @@ -0,0 +1,217 @@ +package datasize + +import ( + "errors" + "fmt" + "strconv" + "strings" +) + +type ByteSize uint64 + +const ( + B ByteSize = 1 + KB = B << 10 + MB = KB << 10 + GB = MB << 10 + TB = GB << 10 + PB = TB << 10 + EB = PB << 10 + + fnUnmarshalText string = "UnmarshalText" + maxUint64 uint64 = (1 << 64) - 1 + cutoff uint64 = maxUint64 / 10 +) + +var ErrBits = errors.New("unit with capital unit prefix and lower case unit (b) - bits, not bytes ") + +func (b ByteSize) Bytes() uint64 { + return uint64(b) +} + +func (b ByteSize) KBytes() float64 { + v := b / KB + r := b % KB + return float64(v) + float64(r)/float64(KB) +} + +func (b ByteSize) MBytes() float64 { + v := b / MB + r := b % MB + return float64(v) + float64(r)/float64(MB) +} + +func (b ByteSize) GBytes() float64 { + v := b / GB + r := b % GB + return float64(v) + float64(r)/float64(GB) +} + +func (b ByteSize) TBytes() float64 { + v := b / TB + r := b % TB + return float64(v) + float64(r)/float64(TB) +} + +func (b ByteSize) PBytes() float64 { + v := b / PB + r := b % PB + return float64(v) + float64(r)/float64(PB) +} + +func (b ByteSize) EBytes() float64 { + v := b / EB + r := b % EB + return float64(v) + float64(r)/float64(EB) +} + +func (b ByteSize) String() string { + switch { + case b == 0: + return fmt.Sprint("0B") + case b%EB == 0: + return fmt.Sprintf("%dEB", b/EB) + case b%PB == 0: + return fmt.Sprintf("%dPB", b/PB) + case b%TB == 0: + return fmt.Sprintf("%dTB", b/TB) + case b%GB == 0: + return fmt.Sprintf("%dGB", b/GB) + case b%MB == 0: + return fmt.Sprintf("%dMB", b/MB) + case b%KB == 0: + return fmt.Sprintf("%dKB", b/KB) + default: + return fmt.Sprintf("%dB", b) + } +} + +func (b ByteSize) HR() string { + return b.HumanReadable() +} + +func (b ByteSize) HumanReadable() string { + switch { + case b > EB: + return fmt.Sprintf("%.1f EB", b.EBytes()) + case b > PB: + return fmt.Sprintf("%.1f PB", b.PBytes()) + case b > TB: + return fmt.Sprintf("%.1f TB", b.TBytes()) + case b > GB: + return fmt.Sprintf("%.1f GB", b.GBytes()) + case b > MB: + return fmt.Sprintf("%.1f MB", b.MBytes()) + case b > KB: + return fmt.Sprintf("%.1f KB", b.KBytes()) + default: + return fmt.Sprintf("%d B", b) + } +} + +func (b ByteSize) MarshalText() ([]byte, error) { + return []byte(b.String()), nil +} + +func (b *ByteSize) UnmarshalText(t []byte) error { + var val uint64 + var unit string + + // copy for error message + t0 := t + + var c byte + var i int + +ParseLoop: + for i < len(t) { + c = t[i] + switch { + case '0' <= c && c <= '9': + if val > cutoff { + goto Overflow + } + + c = c - '0' + val *= 10 + + if val > val+uint64(c) { + // val+v overflows + goto Overflow + } + val += uint64(c) + i++ + + default: + if i == 0 { + goto SyntaxError + } + break ParseLoop + } + } + + unit = strings.TrimSpace(string(t[i:])) + switch unit { + case "Kb", "Mb", "Gb", "Tb", "Pb", "Eb": + goto BitsError + } + unit = strings.ToLower(unit) + switch unit { + case "", "b", "byte": + // do nothing - already in bytes + + case "k", "kb", "kilo", "kilobyte", "kilobytes": + if val > maxUint64/uint64(KB) { + goto Overflow + } + val *= uint64(KB) + + case "m", "mb", "mega", "megabyte", "megabytes": + if val > maxUint64/uint64(MB) { + goto Overflow + } + val *= uint64(MB) + + case "g", "gb", "giga", "gigabyte", "gigabytes": + if val > maxUint64/uint64(GB) { + goto Overflow + } + val *= uint64(GB) + + case "t", "tb", "tera", "terabyte", "terabytes": + if val > maxUint64/uint64(TB) { + goto Overflow + } + val *= uint64(TB) + + case "p", "pb", "peta", "petabyte", "petabytes": + if val > maxUint64/uint64(PB) { + goto Overflow + } + val *= uint64(PB) + + case "E", "EB", "e", "eb", "eB": + if val > maxUint64/uint64(EB) { + goto Overflow + } + val *= uint64(EB) + + default: + goto SyntaxError + } + + *b = ByteSize(val) + return nil + +Overflow: + *b = ByteSize(maxUint64) + return &strconv.NumError{fnUnmarshalText, string(t0), strconv.ErrRange} + +SyntaxError: + *b = 0 + return &strconv.NumError{fnUnmarshalText, string(t0), strconv.ErrSyntax} + +BitsError: + *b = 0 + return &strconv.NumError{fnUnmarshalText, string(t0), ErrBits} +} diff --git a/vendor/modules.txt b/vendor/modules.txt index e90b275ce54fe..86227e4a8b7e4 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -111,6 +111,8 @@ github.com/blang/semver github.com/bmatcuk/doublestar # github.com/bradfitz/gomemcache v0.0.0-20190913173617-a41fca850d0b github.com/bradfitz/gomemcache/memcache +# github.com/c2h5oh/datasize v0.0.0-20200112174442-28bbd4740fee +github.com/c2h5oh/datasize # github.com/cespare/xxhash v1.1.0 github.com/cespare/xxhash # github.com/cespare/xxhash/v2 v2.1.1