Skip to content

Commit

Permalink
upload files to S3
Browse files Browse the repository at this point in the history
Add a --upload option to upload files to s3. Upload is disabled by
default. A new --purge-remote option can be used to purge remote files.
  • Loading branch information
orgrim committed Dec 4, 2021
1 parent 245e0ca commit 07c051b
Show file tree
Hide file tree
Showing 10 changed files with 655 additions and 11 deletions.
28 changes: 28 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ oldest tested server version of PostgreSQL is 8.2.

## Usage

### Basic usage

Use the `--help` or `-?` to print the list of available options. To dump all
databases, you only need to give the proper connection options to the PostgreSQL
instance and the path to a writable directory to store the dump files.
Expand All @@ -77,6 +79,8 @@ The other command line options let you tweak what is dumped, purged, and how
it is done. These options can be put in a configuration file. The command line
options override configuration options.

### Per-database configuration

Per-database configuration can only be done with a configuration file. The
configuration file uses the `ini` format, global options are in a unspecified
section at the top of the file, and database specific options are in a section
Expand All @@ -102,12 +106,16 @@ concurrent `pg_dump` jobs greater than 1 with `--jobs` (`-j`) option. It is diff
than `--parallel-backup-jobs` (`-J`) that controls the number of sessions used by
`pg_dump` with the directory format.

### Checksums

A checksum of all output files is computed in a separate file when
`--checksum-algo` (`-S`) is different than `none`. The possible algorithms are:
`sha1`, `sha224`, `sha256`, `sha384` and `sha512`. The checksum file is in the
format required by _shaXsum_ (`sha1sum`, `sha256sum`, etc.) tools for checking
with their `-c` option.

### Purge

Older dumps can be removed based on their age with `--purge-older-than` (`-P`)
in days, if no unit is given. Allowed units are the ones understood by the
`time.ParseDuration` Go function: "s" (seconds), "m" (minutes), "h" (hours) and
Expand All @@ -120,11 +128,15 @@ avoiding file removal completly. When both `--purge-older-than` and
before old dumps are removed. This avoids removing all dumps when the time
interval is too small.

### Hooks

A command can be run before taking dumps with `--pre-backup-hook`, and after
with `--post-backup-hook`. The commands are executed directly, not by a shell,
respecting single and double quoted values. Even if some operation fails, the
post backup hook is executed when present.

### Encryption

All the files procuded by a run of pg_back can be encrypted using age
(<https://age-encryption.org/> an easy to use tool that does authenticated
encryption of files). To keep things simple, encryption is done using a
Expand All @@ -144,6 +156,22 @@ the `age` tool, independently. Decryption of multiple files can be parallelized
with the `-j` option. Arguments on the commandline (database names when
dumping) are used as shell globs to choose which files to decrypt.

### Upload to remote locations

All files produced by a run can be uploaded to a remote location by setting the
`--upload` option to a value different than `none`. The possible values are `s3` or
`none`.

When set to `s3`, files are uploaded to AWS S3. The `--s3-*` family of options
can be used to tweak the access to the bucket. The `--s3-profile` option only reads
credentials and basic configuration, s3 specific options are not used.

The `--purge-remote` option can be set to `yes` to apply the same purge policy
on the remote location as the local directory.

When files are encrypted and their unencrypted source is kept, only encrypted
files are uploaded.

## Managing the configuration file

The previous v1 configuration files are not compatible with pg_back v2.
Expand Down
125 changes: 125 additions & 0 deletions config.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,17 @@ type options struct {
EncryptKeepSrc bool
CipherPassphrase string
Decrypt bool

Upload string // values are none, s3
PurgeRemote bool
S3Region string
S3Bucket string
S3EndPoint string
S3Profile string
S3KeyID string
S3Secret string
S3ForcePath bool
S3DisableTLS bool
}

func defaultOptions() options {
Expand All @@ -95,6 +106,7 @@ func defaultOptions() options {
SumAlgo: "none",
CfgFile: defaultCfgFile,
TimeFormat: timeFormat,
Upload: "none",
}
}

Expand Down Expand Up @@ -158,6 +170,35 @@ func validatePurgeTimeLimitValue(i string) (time.Duration, error) {

}

func validateYesNoOption(s string) (bool, error) {
ls := strings.TrimSpace(strings.ToLower(s))
if ls == "y" || ls == "yes" {
return true, nil
}

if ls == "n" || ls == "no" {
return false, nil
}

return false, fmt.Errorf("value must be \"yes\" or \"no\"")
}

func validateEnum(s string, candidates []string) error {
found := false
ls := strings.TrimSpace(strings.ToLower(s))
for _, v := range candidates {
if v == ls {
found = true
}
}

if !found {
return fmt.Errorf("value not found in %v", candidates)
}

return nil
}

func parseCli(args []string) (options, []string, error) {
var format, purgeKeep, purgeInterval string

Expand Down Expand Up @@ -197,6 +238,18 @@ func parseCli(args []string) (options, []string, error) {
pflag.BoolVar(&opts.Decrypt, "decrypt", false, "decrypt files in the backup directory")
pflag.StringVar(&opts.CipherPassphrase, "cipher-pass", "", "cipher passphrase for encryption and decryption\n")

pflag.StringVar(&opts.Upload, "upload", "none", "upload produced files to target (s3, gcs,..) use \"none\" to override\nconfiguration file and disable upload")
purgeRemote := pflag.String("purge-remote", "no", "purge the file on remote location after upload, with the same rules as the local directory")

pflag.StringVar(&opts.S3Region, "s3-region", "", "S3 region")
pflag.StringVar(&opts.S3Bucket, "s3-bucket", "", "S3 bucket")
pflag.StringVar(&opts.S3Profile, "s3-profile", "", "AWS client profile name to get credentials")
pflag.StringVar(&opts.S3KeyID, "s3-key-id", "", "AWS Access key ID")
pflag.StringVar(&opts.S3Secret, "s3-secret", "", "AWS Secret access key")
pflag.StringVar(&opts.S3EndPoint, "s3-endpoint", "", "S3 endpoint URI")
S3ForcePath := pflag.String("s3-force-path", "no", "force path style addressing instead of virtual hosted bucket\naddressing")
S3UseTLS := pflag.String("s3-tls", "yes", "enable or disable TLS on requests")

pflag.StringVarP(&opts.Host, "host", "h", "", "database server host or socket directory")
pflag.IntVarP(&opts.Port, "port", "p", 0, "database server port number")
pflag.StringVarP(&opts.Username, "username", "U", "", "connect as specified database user")
Expand Down Expand Up @@ -323,6 +376,33 @@ func parseCli(args []string) (options, []string, error) {
}
}

// Validate upload option
stores := []string{"none", "s3"}
if err := validateEnum(opts.Upload, stores); err != nil {
return opts, changed, fmt.Errorf("invalid value for --upload: %s", err)
}

opts.PurgeRemote, err = validateYesNoOption(*purgeRemote)
if err != nil {
return opts, changed, fmt.Errorf("invalid value for --purge-remote: %s", err)
}

// Validate S3 options
opts.S3ForcePath, err = validateYesNoOption(*S3ForcePath)
if err != nil {
return opts, changed, fmt.Errorf("invalid value for --s3-force-path: %s", err)
}

S3WithTLS, err := validateYesNoOption(*S3UseTLS)
if err != nil {
return opts, changed, fmt.Errorf("invalid value for --s3-tls: %s", err)
}
opts.S3DisableTLS = !S3WithTLS

if opts.Upload == "s3" && opts.S3Bucket == "" {
return opts, changed, fmt.Errorf("option --s3-bucket is mandatory when --upload=s3")
}

return opts, changed, nil
}

Expand Down Expand Up @@ -365,6 +445,18 @@ func loadConfigurationFile(path string) (options, error) {
opts.CipherPassphrase = s.Key("cipher_passphrase").MustString("")
opts.EncryptKeepSrc = s.Key("encrypt_keep_source").MustBool(false)

opts.Upload = s.Key("upload").MustString("none")
opts.PurgeRemote = s.Key("purge_remote").MustBool(false)

opts.S3Region = s.Key("s3_region").MustString("")
opts.S3Bucket = s.Key("s3_bucket").MustString("")
opts.S3EndPoint = s.Key("s3_endpoint").MustString("")
opts.S3Profile = s.Key("s3_profile").MustString("")
opts.S3KeyID = s.Key("s3_key_id").MustString("")
opts.S3Secret = s.Key("s3_secret").MustString("")
opts.S3ForcePath = s.Key("s3_force_path").MustBool(false)
opts.S3DisableTLS = !s.Key("s3_tls").MustBool(true)

// Validate purge keep and time limit
keep, err := validatePurgeKeepValue(purgeKeep)
if err != nil {
Expand Down Expand Up @@ -395,6 +487,16 @@ func loadConfigurationFile(path string) (options, error) {
return opts, fmt.Errorf("cannot use an empty passphrase for encryption")
}

// Validate upload option
stores := []string{"none", "s3"}
if err := validateEnum(opts.Upload, stores); err != nil {
return opts, fmt.Errorf("invalid value for upload: %s", err)
}

if opts.Upload == "s3" && opts.S3Bucket == "" {
return opts, fmt.Errorf("option s3_bucket is mandatory when upload is s3")
}

// Validate the value of the timestamp format. Force the use of legacy
// on windows to avoid failure when creating filenames with the
// timestamp
Expand Down Expand Up @@ -563,6 +665,29 @@ func mergeCliAndConfigOptions(cliOpts options, configOpts options, onCli []strin
opts.CipherPassphrase = cliOpts.CipherPassphrase
case "decrypt":
opts.Decrypt = cliOpts.Decrypt

case "upload":
opts.Upload = cliOpts.Upload
case "purge-remote":
opts.PurgeRemote = cliOpts.PurgeRemote

case "s3-region":
opts.S3Region = cliOpts.S3Region
case "s3-bucket":
opts.S3Bucket = cliOpts.S3Bucket
case "s3-profile":
opts.S3Profile = cliOpts.S3Profile
case "s3-key-id":
opts.S3KeyID = cliOpts.S3KeyID
case "s3-secret":
opts.S3Secret = cliOpts.S3Secret
case "s3-endpoint":
opts.S3EndPoint = cliOpts.S3EndPoint
case "s3-force-path":
opts.S3ForcePath = cliOpts.S3ForcePath
case "s3-tls":
opts.S3DisableTLS = cliOpts.S3DisableTLS

case "host":
opts.Host = cliOpts.Host
case "port":
Expand Down
Loading

0 comments on commit 07c051b

Please sign in to comment.