Skip to content

Commit

Permalink
Add s2 commandline tools (#160)
Browse files Browse the repository at this point in the history
* Add s2 commandline tools
  • Loading branch information
klauspost authored Sep 14, 2019
1 parent 77b2d8a commit 72edc79
Show file tree
Hide file tree
Showing 4 changed files with 365 additions and 1 deletion.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ script:
- go test -v -cpu=2 ./...
- go test -cpu=2 -tags=noasm ./...
- go test -cpu=1,4 -short -race ./...
- go build github.com/klauspost/compress/s2/cmd/s2c && go build github.com/klauspost/compress/s2/cmd/s2d && s2c && s2d
- GOOS=linux GOARCH=386 go install ./...

matrix:
Expand Down
64 changes: 63 additions & 1 deletion s2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,69 @@ Similar to the Writer, a Reader can be reused using the `Reset` method.
For smaller data blocks, there is also a non-streaming interface: `Encode()`, `EncodeBetter()` and `Decode()`.
Do however note that these functions (similar to Snappy) does not provide validation of data,
so data corruption may be undetected. Stream encoding provides CRC checks of data.


# Commandline tools

Some very simply commandline tools are provided; `s2c` for compression and `s2d` for decompression.

Installing then requires Go to be installed. To install them, use:

`go install github.com/klauspost/compress/s2/cmd/s2c && go install github.com/klauspost/compress/s2/cmd/s2d`

To build binaries to the current folder use:

`go build github.com/klauspost/compress/s2/cmd/s2c && go build github.com/klauspost/compress/s2/cmd/s2d`


## s2c

```
Usage: s2c [options] file1 file2
Compresses all files supplied as input separately.
Output files are written as 'filename.ext.s2'.
By default output files will be overwritten.
Use - as the only file name to read from stdin and write to stdout.
Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt
Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt
Options:
-blocksize string
Max block size. Examples: 64K, 256K, 1M, 4M. Must be power of two and <= 4MB (default "1M")
-c Write all output to stdout. Multiple input files will be concatenated.
-cpu int
Compress using this amount of threads (default Auto)
-faster
Compress faster, but with a minor compression loss
-help
Display help
-pad string
Pad size to a multiple of this value, Examples: 64K, 256K, 1M, 4M, etc (default "1")
-safe
Do not overwrite output files
```

## s2d

```
Usage: s2d [options] file1 file2
Decompresses all files supplied as input. Input files must end with '.s2' or '.snappy'.
Output file names have the extension removed. By default output files will be overwritten.
Use - as the only file name to read from stdin and write to stdout.
Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt
Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt
Options:
-c Write all output to stdout. Multiple input files will be concatenated.
-help
Display help
-safe
Do not overwrite output files
```

# Performance

This section will focus on comparisons to Snappy.
Expand Down
169 changes: 169 additions & 0 deletions s2/cmd/s2c/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
package main

import (
"bufio"
"errors"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"time"
"unicode"

"github.com/klauspost/compress/s2"
)

var (
faster = flag.Bool("faster", false, "Compress faster, but with a minor compression loss")
cpu = flag.Int("cpu", runtime.GOMAXPROCS(0), "Compress using this amount of threads")
blockSize = flag.String("blocksize", "1M", "Max block size. Examples: 64K, 256K, 1M, 4M. Must be power of two and <= 4MB")
safe = flag.Bool("safe", false, "Do not overwrite output files")
padding = flag.String("pad", "1", "Pad size to a multiple of this value, Examples: 64K, 256K, 1M, 4M, etc")
stdout = flag.Bool("c", false, "Write all output to stdout. Multiple input files will be concatenated.")
help = flag.Bool("help", false, "Display help")
)

func main() {
flag.Parse()
sz, err := toSize(*blockSize)
exitErr(err)
pad, err := toSize(*padding)
exitErr(err)

args := flag.Args()
if len(args) == 0 || *help {
fmt.Println(`Usage: s2c [options] file1 file2
Compresses all files supplied as input separately.
Output files are written as 'filename.ext.s2'.
By default output files will be overwritten.
Use - as the only file name to read from stdin and write to stdout.
Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt
Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt
Options:`)
flag.PrintDefaults()
}
opts := []s2.WriterOption{s2.WriterBlockSize(int(sz)), s2.WriterConcurrency(*cpu), s2.WriterPadding(int(pad))}
if !*faster {
opts = append(opts, s2.WriterBetterCompression())
}
wr := s2.NewWriter(nil, opts...)

// No args, use stdin/stdout
if len(args) == 1 && args[0] == "-" {
wr.Reset(os.Stdout)
_, err := io.Copy(wr, os.Stdin)
exitErr(err)
exitErr(wr.Close())
return
}
var files []string

for _, pattern := range args {
found, err := filepath.Glob(pattern)
exitErr(err)
if len(found) == 0 {
exitErr(fmt.Errorf("unable to find file %v", pattern))
}
files = append(files, found...)
}

for _, filename := range files {
func() {
dstFilename := fmt.Sprintf("%s%s", filename, ".s2")
if !*stdout {
fmt.Println("Compressing", filename, "->", dstFilename)
}
// Input file.
file, err := os.Open(filename)
exitErr(err)
defer file.Close()
src := bufio.NewReaderSize(file, int(sz)*2)
finfo, err := file.Stat()
exitErr(err)
var out io.Writer
if *stdout {
out = os.Stdout
} else {
mode := finfo.Mode() // use the same mode for the output file
if *safe {
_, err := os.Stat(dstFilename)
if !os.IsNotExist(err) {
exitErr(errors.New("destination files exists"))
}
}
dstFile, err := os.OpenFile(dstFilename, os.O_CREATE|os.O_WRONLY, mode)
exitErr(err)
defer dstFile.Close()
bw := bufio.NewWriterSize(dstFile, int(sz)*2)
defer bw.Flush()
out = bw
}
wc := wCounter{out: out}
wr.Reset(&wc)
defer wr.Close()
start := time.Now()
input, err := wr.ReadFrom(src)
exitErr(err)
err = wr.Close()
exitErr(err)
if !*stdout {
elapsed := time.Since(start)
mbpersec := (float64(input) / (1024 * 1024)) / (float64(elapsed) / (float64(time.Second)))
pct := float64(wc.n) * 100 / float64(input)
fmt.Printf("%d -> %d [%.02f%%]; %dMB/s\n", input, wc.n, pct, int(mbpersec))
}
}()
}
}

func exitErr(err error) {
if err != nil {
fmt.Fprintln(os.Stderr, "ERROR:", err.Error())
os.Exit(2)
}
}

// toSize converts a size indication to bytes.
func toSize(size string) (uint64, error) {
size = strings.ToUpper(strings.TrimSpace(size))
firstLetter := strings.IndexFunc(size, unicode.IsLetter)
if firstLetter == -1 {
firstLetter = len(size)
}

bytesString, multiple := size[:firstLetter], size[firstLetter:]
bytes, err := strconv.ParseUint(bytesString, 10, 64)
if err != nil {
return 0, fmt.Errorf("unable to parse size: %v", err)
}

switch multiple {
case "M", "MB", "MIB":
return bytes * 1 << 20, nil
case "K", "KB", "KIB":
return bytes * 1 << 10, nil
case "B", "":
return bytes, nil
default:
return 0, fmt.Errorf("unknown size suffix: %v", multiple)
}
}

type wCounter struct {
n int
out io.Writer
}

func (w *wCounter) Write(p []byte) (n int, err error) {
n, err = w.out.Write(p)
w.n += n
return n, err

}
132 changes: 132 additions & 0 deletions s2/cmd/s2d/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
package main

import (
"bufio"
"errors"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"time"

"github.com/klauspost/compress/s2"
)

var (
safe = flag.Bool("safe", false, "Do not overwrite output files")
stdout = flag.Bool("c", false, "Write all output to stdout. Multiple input files will be concatenated.")
help = flag.Bool("help", false, "Display help")
)

func main() {
flag.Parse()
r := s2.NewReader(nil)

// No args, use stdin/stdout
args := flag.Args()
if len(args) == 0 || *help {
fmt.Println(`Usage: s2d [options] file1 file2
Decompresses all files supplied as input. Input files must end with '.s2' or '.snappy'.
Output file names have the extension removed. By default output files will be overwritten.
Use - as the only file name to read from stdin and write to stdout.
Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt
Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt
Options:`)
flag.PrintDefaults()
}
if len(args) == 1 && args[0] == "-" {
r.Reset(os.Stdin)
_, err := io.Copy(os.Stdout, r)
exitErr(err)
return
}
var files []string

for _, pattern := range args {
found, err := filepath.Glob(pattern)
exitErr(err)
if len(found) == 0 {
exitErr(fmt.Errorf("unable to find file %v", pattern))
}
files = append(files, found...)
}

for _, filename := range files {
dstFilename := filename
switch {
case strings.HasSuffix(filename, ".s2"):
dstFilename = strings.TrimSuffix(filename, ".s2")
case strings.HasSuffix(filename, ".snappy"):
dstFilename = strings.TrimSuffix(filename, ".snappy")
default:
fmt.Println("Skipping", filename)
continue
}

func() {
if !*stdout {
fmt.Println("Decompressing", filename, "->", dstFilename)
}
// Input file.
file, err := os.Open(filename)
exitErr(err)
defer file.Close()
rc := rCounter{in: file}
src := bufio.NewReaderSize(&rc, 4<<20)
finfo, err := file.Stat()
exitErr(err)
mode := finfo.Mode() // use the same mode for the output file
if *safe {
_, err := os.Stat(dstFilename)
if !os.IsNotExist(err) {
exitErr(errors.New("destination files exists"))
}
}
var out io.Writer
if *stdout {
out = os.Stdout
} else {
dstFile, err := os.OpenFile(dstFilename, os.O_CREATE|os.O_WRONLY, mode)
exitErr(err)
defer dstFile.Close()
bw := bufio.NewWriterSize(out, 4<<20)
defer bw.Flush()
out = bw
}
r.Reset(src)
start := time.Now()
output, err := io.Copy(out, r)
exitErr(err)
if !*stdout {
elapsed := time.Since(start)
mbPerSec := (float64(output) / (1024 * 1024)) / (float64(elapsed) / (float64(time.Second)))
pct := float64(output) * 100 / float64(rc.n)
fmt.Printf("%d -> %d [%.02f%%]; %dMB/s\n", rc.n, output, pct, int(mbPerSec))
}
}()
}
}

func exitErr(err error) {
if err != nil {
fmt.Fprintln(os.Stderr, "ERROR:", err.Error())
os.Exit(2)
}
}

type rCounter struct {
n int
in io.Reader
}

func (w *rCounter) Read(p []byte) (n int, err error) {
n, err = w.in.Read(p)
w.n += n
return n, err

}

0 comments on commit 72edc79

Please sign in to comment.