diff --git a/go/cmd/vtcombo/main.go b/go/cmd/vtcombo/main.go index 3c0078e2475..27aef62f869 100644 --- a/go/cmd/vtcombo/main.go +++ b/go/cmd/vtcombo/main.go @@ -27,6 +27,8 @@ import ( "strings" "time" + "vitess.io/vitess/go/vt/log" + "github.com/spf13/pflag" "google.golang.org/protobuf/proto" @@ -34,7 +36,7 @@ import ( "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/vt/dbconfigs" "vitess.io/vitess/go/vt/env" - "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/logutil" "vitess.io/vitess/go/vt/mysqlctl" "vitess.io/vitess/go/vt/servenv" diff --git a/go/cmd/vtorc/main.go b/go/cmd/vtorc/main.go index 47dc27f8ea0..fb7f1667180 100644 --- a/go/cmd/vtorc/main.go +++ b/go/cmd/vtorc/main.go @@ -25,15 +25,14 @@ import ( _ "github.com/mattn/go-sqlite3" "github.com/spf13/pflag" + _flag "vitess.io/vitess/go/internal/flag" + "vitess.io/vitess/go/vt/log" vtlog "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/logutil" "vitess.io/vitess/go/vt/orchestrator/app" "vitess.io/vitess/go/vt/orchestrator/config" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/inst" "vitess.io/vitess/go/vt/servenv" - - _flag "vitess.io/vitess/go/internal/flag" ) var ( @@ -127,10 +126,6 @@ func main() { sibling := fs.StringP("sibling", "s", "", "sibling instance, host_fqdn[:port]") destination := fs.StringP("destination", "d", "", "destination instance, host_fqdn[:port] (synonym to -s)") discovery := fs.Bool("discovery", true, "auto discovery mode") - quiet := fs.Bool("quiet", false, "quiet") - verbose := fs.Bool("verbose", false, "verbose") - debug := fs.Bool("debug", false, "debug mode (very verbose)") - stack := fs.Bool("stack", false, "add stack trace upon error") config.RuntimeCLIFlags.SkipUnresolve = fs.Bool("skip-unresolve", false, "Do not unresolve a host name") config.RuntimeCLIFlags.SkipUnresolveCheck = fs.Bool("skip-unresolve-check", false, "Skip/ignore checking an unresolve mapping (via hostname_unresolve table) resolves back to same hostname") config.RuntimeCLIFlags.Noop = fs.Bool("noop", false, "Dry run; do not perform destructing operations") @@ -178,17 +173,6 @@ Please update your scripts before the next version, when this will begin to brea *destination = *sibling } - log.SetLevel(log.ERROR) - if *verbose { - log.SetLevel(log.INFO) - } - if *debug { - log.SetLevel(log.DEBUG) - } - if *stack { - log.SetPrintStackTrace(*stack) - } - startText := "starting orchestrator" if AppVersion != "" { startText += ", version: " + AppVersion @@ -206,17 +190,6 @@ Please update your scripts before the next version, when this will begin to brea if *config.RuntimeCLIFlags.EnableDatabaseUpdate { config.Config.SkipOrchestratorDatabaseUpdate = false } - if config.Config.Debug { - log.SetLevel(log.DEBUG) - } - if *quiet { - // Override!! - log.SetLevel(log.ERROR) - } - if config.Config.EnableSyslog { - log.EnableSyslogWriter("orchestrator") - log.SetSyslogLevel(log.INFO) - } if config.Config.AuditToSyslog { inst.EnableAuditSyslog() } diff --git a/go/vt/orchestrator/app/cli.go b/go/vt/orchestrator/app/cli.go index 99eefa27ae4..4b6237c3214 100644 --- a/go/vt/orchestrator/app/cli.go +++ b/go/vt/orchestrator/app/cli.go @@ -21,13 +21,13 @@ import ( "net" "os" "os/user" - "regexp" "sort" "strings" "time" + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/orchestrator/config" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/util" "vitess.io/vitess/go/vt/orchestrator/inst" "vitess.io/vitess/go/vt/orchestrator/logic" @@ -98,7 +98,7 @@ func getClusterName(clusterAlias string, instanceKey *inst.InstanceKey) (cluster func validateInstanceIsFound(instanceKey *inst.InstanceKey) (instance *inst.Instance) { instance, _, err := inst.ReadInstance(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } if instance == nil { log.Fatalf("Instance not found: %+v", *instanceKey) @@ -106,28 +106,6 @@ func validateInstanceIsFound(instanceKey *inst.InstanceKey) (instance *inst.Inst return instance } -// CliWrapper is called from main and allows for the instance parameter -// to take multiple instance names separated by a comma or whitespace. -func CliWrapper(command string, strict bool, instances string, destination string, owner string, reason string, duration string, pattern string, clusterAlias string, pool string, hostnameFlag string) { - if config.Config.RaftEnabled && !*config.RuntimeCLIFlags.IgnoreRaftSetup { - log.Fatalf(`Orchestrator configured to run raft ("RaftEnabled": true). All access must go through the web API of the active raft node. You may use the orchestrator-client script which has a similar interface to the command line invocation. You may override this with --ignore-raft-setup`) - } - r := regexp.MustCompile(`[ ,\r\n\t]+`) - tokens := r.Split(instances, -1) - switch command { - case "submit-pool-instances": - { - // These commands unsplit the tokens (they expect a comma delimited list of instances) - tokens = []string{instances} - } - } - for _, instance := range tokens { - if instance != "" || len(tokens) == 1 { - Cli(command, strict, instance, destination, owner, reason, duration, pattern, clusterAlias, pool, hostnameFlag) - } - } -} - // Cli initiates a command line interface, executing requested command. func Cli(command string, strict bool, instance string, destination string, owner string, reason string, duration string, pattern string, clusterAlias string, pool string, hostnameFlag string) { if synonym, ok := commandSynonyms[command]; ok { @@ -173,7 +151,7 @@ func Cli(command string, strict bool, instance string, destination string, owner // get os username as owner usr, err := user.Current() if err != nil { - log.Fatale(err) + log.Fatal(err) } owner = usr.Username } @@ -194,7 +172,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } _, err := inst.RelocateBelow(instanceKey, destinationKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Printf("%s<%s\n", instanceKey.DisplayString(), destinationKey.DisplayString()) } @@ -206,10 +184,10 @@ func Cli(command string, strict bool, instance string, destination string, owner } replicas, _, errs, err := inst.RelocateReplicas(instanceKey, destinationKey, pattern) if err != nil { - log.Fatale(err) + log.Fatal(err) } else { for _, e := range errs { - log.Errore(e) + log.Error(e) } for _, replica := range replicas { fmt.Println(replica.Key.DisplayString()) @@ -224,7 +202,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } _, _, err := inst.TakeSiblings(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -240,14 +218,14 @@ func Cli(command string, strict bool, instance string, destination string, owner lostReplicas = append(lostReplicas, cannotReplicateReplicas...) postponedFunctionsContainer.Wait() - if promotedReplica == nil { + if err != nil { + log.Fatal(err) + } + if promotedReplica == nil { //nolint log.Fatalf("Could not regroup replicas of %+v; error: %+v", *instanceKey, err) } fmt.Printf("%s lost: %d, trivial: %d, pseudo-gtid: %d\n", promotedReplica.Key.DisplayString(), len(lostReplicas), len(equalReplicas), len(aheadReplicas)) //nolint - if err != nil { - log.Fatale(err) - } } // General replication commands // move, binlog file:pos @@ -256,7 +234,7 @@ func Cli(command string, strict bool, instance string, destination string, owner instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) instance, err := inst.MoveUp(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Printf("%s<%s\n", instanceKey.DisplayString(), instance.SourceKey.DisplayString()) } @@ -269,10 +247,10 @@ func Cli(command string, strict bool, instance string, destination string, owner movedReplicas, _, errs, err := inst.MoveUpReplicas(instanceKey, pattern) if err != nil { - log.Fatale(err) + log.Fatal(err) } else { for _, e := range errs { - log.Errore(e) + log.Error(e) } for _, replica := range movedReplicas { fmt.Println(replica.Key.DisplayString()) @@ -287,7 +265,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } _, err := inst.MoveBelow(instanceKey, destinationKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Printf("%s<%s\n", instanceKey.DisplayString(), destinationKey.DisplayString()) } @@ -297,7 +275,7 @@ func Cli(command string, strict bool, instance string, destination string, owner // destinationKey can be null, in which case the instance repoints to its existing primary instance, err := inst.Repoint(instanceKey, destinationKey, inst.GTIDHintNeutral) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Printf("%s<%s\n", instanceKey.DisplayString(), instance.SourceKey.DisplayString()) } @@ -306,10 +284,10 @@ func Cli(command string, strict bool, instance string, destination string, owner instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) repointedReplicas, errs, err := inst.RepointReplicasTo(instanceKey, pattern, destinationKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } else { for _, e := range errs { - log.Errore(e) + log.Error(e) } for _, replica := range repointedReplicas { fmt.Printf("%s<%s\n", replica.Key.DisplayString(), instanceKey.DisplayString()) @@ -324,7 +302,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } _, err := inst.TakePrimary(instanceKey, false) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -333,7 +311,7 @@ func Cli(command string, strict bool, instance string, destination string, owner instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) _, err := inst.MakeCoPrimary(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -346,7 +324,7 @@ func Cli(command string, strict bool, instance string, destination string, owner instance, _, _, _, _, err := inst.GetCandidateReplica(instanceKey, false) if err != nil { - log.Fatale(err) + log.Fatal(err) } else { fmt.Println(instance.Key.DisplayString()) } @@ -360,13 +338,13 @@ func Cli(command string, strict bool, instance string, destination string, owner validateInstanceIsFound(instanceKey) _, promotedBinlogServer, err := inst.RegroupReplicasBinlogServers(instanceKey, false) - if promotedBinlogServer == nil { + if err != nil { + log.Fatal(err) + } + if promotedBinlogServer == nil { //nolint log.Fatalf("Could not regroup binlog server replicas of %+v; error: %+v", *instanceKey, err) } fmt.Println(promotedBinlogServer.Key.DisplayString()) //nolint - if err != nil { - log.Fatale(err) - } } // move, GTID case registerCliCommand("move-gtid", "GTID relocation", `Move a replica beneath another instance.`): @@ -377,7 +355,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } _, err := inst.MoveBelowGTID(instanceKey, destinationKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Printf("%s<%s\n", instanceKey.DisplayString(), destinationKey.DisplayString()) } @@ -389,10 +367,10 @@ func Cli(command string, strict bool, instance string, destination string, owner } movedReplicas, _, errs, err := inst.MoveReplicasGTID(instanceKey, destinationKey, pattern) if err != nil { - log.Fatale(err) + log.Fatal(err) } else { for _, e := range errs { - log.Errore(e) + log.Error(e) } for _, replica := range movedReplicas { fmt.Println(replica.Key.DisplayString()) @@ -410,13 +388,13 @@ func Cli(command string, strict bool, instance string, destination string, owner lostReplicas, movedReplicas, cannotReplicateReplicas, promotedReplica, err := inst.RegroupReplicasGTID(instanceKey, false, func(candidateReplica *inst.Instance) { fmt.Println(candidateReplica.Key.DisplayString()) }, postponedFunctionsContainer, nil) lostReplicas = append(lostReplicas, cannotReplicateReplicas...) - if promotedReplica == nil { + if err != nil { + log.Fatal(err) + } + if promotedReplica == nil { //nolint log.Fatalf("Could not regroup replicas of %+v; error: %+v", *instanceKey, err) } fmt.Printf("%s lost: %d, moved: %d\n", promotedReplica.Key.DisplayString(), len(lostReplicas), len(movedReplicas)) //nolint - if err != nil { - log.Fatale(err) - } } // General replication commands case registerCliCommand("enable-gtid", "Replication, general", `If possible, turn on GTID replication`): @@ -424,7 +402,7 @@ func Cli(command string, strict bool, instance string, destination string, owner instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) _, err := inst.EnableGTID(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -433,7 +411,7 @@ func Cli(command string, strict bool, instance string, destination string, owner instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) _, err := inst.DisableGTID(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -443,9 +421,9 @@ func Cli(command string, strict bool, instance string, destination string, owner instance, err := inst.ReadTopologyInstance(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } - if instance == nil { + if instance == nil { //nolint log.Fatalf("Instance not found: %+v", *instanceKey) } fmt.Println(instance.GtidErrant) //nolint @@ -455,7 +433,7 @@ func Cli(command string, strict bool, instance string, destination string, owner instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) _, err := inst.ErrantGTIDResetPrimary(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -464,7 +442,7 @@ func Cli(command string, strict bool, instance string, destination string, owner instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) _, err := inst.SkipQuery(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -473,7 +451,7 @@ func Cli(command string, strict bool, instance string, destination string, owner instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) _, err := inst.StopReplication(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -482,7 +460,7 @@ func Cli(command string, strict bool, instance string, destination string, owner instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) _, err := inst.StartReplication(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -491,7 +469,7 @@ func Cli(command string, strict bool, instance string, destination string, owner instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) _, err := inst.RestartReplication(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -500,7 +478,7 @@ func Cli(command string, strict bool, instance string, destination string, owner instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) _, err := inst.ResetReplicationOperation(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -512,7 +490,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } _, err := inst.DetachReplicaPrimaryHost(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -524,7 +502,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } _, err := inst.ReattachReplicaPrimaryHost(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -536,7 +514,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } instance, err := inst.ReadTopologyInstance(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } if instance == nil { log.Fatalf("Instance not found: %+v", *instanceKey) @@ -548,7 +526,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } _, err = inst.PrimaryPosWait(instanceKey, binlogCoordinates) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -560,7 +538,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } statements, err := inst.GetReplicationRestartPreserveStatements(instanceKey, *config.RuntimeCLIFlags.Statement) if err != nil { - log.Fatale(err) + log.Fatal(err) } for _, statement := range statements { fmt.Println(statement) @@ -611,7 +589,7 @@ func Cli(command string, strict bool, instance string, destination string, owner instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) _, err := inst.SetReadOnly(instanceKey, true) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -620,7 +598,7 @@ func Cli(command string, strict bool, instance string, destination string, owner instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) _, err := inst.SetReadOnly(instanceKey, false) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -635,7 +613,7 @@ func Cli(command string, strict bool, instance string, destination string, owner _, err = inst.FlushBinaryLogsTo(instanceKey, *config.RuntimeCLIFlags.BinlogFile) } if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -649,7 +627,7 @@ func Cli(command string, strict bool, instance string, destination string, owner _, err = inst.PurgeBinaryLogsTo(instanceKey, *config.RuntimeCLIFlags.BinlogFile, false) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -661,7 +639,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } errantBinlogs, err := inst.LocateErrantGTID(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } for _, binlog := range errantBinlogs { fmt.Println(binlog) @@ -675,14 +653,14 @@ func Cli(command string, strict bool, instance string, destination string, owner } err := inst.ApplyPoolInstances(inst.NewPoolInstancesSubmission(pool, instance)) if err != nil { - log.Fatale(err) + log.Fatal(err) } } case registerCliCommand("cluster-pool-instances", "Pools", `List all pools and their associated instances`): { clusterPoolInstances, err := inst.ReadAllClusterPoolInstances() if err != nil { - log.Fatale(err) + log.Fatal(err) } for _, clusterPoolInstance := range clusterPoolInstances { fmt.Printf("%s\t%s\t%s\t%s:%d\n", clusterPoolInstance.ClusterName, clusterPoolInstance.ClusterAlias, clusterPoolInstance.Pool, clusterPoolInstance.Hostname, clusterPoolInstance.Port) @@ -694,7 +672,7 @@ func Cli(command string, strict bool, instance string, destination string, owner instances, err := inst.GetHeuristicClusterPoolInstances(clusterName, pool) if err != nil { - log.Fatale(err) + log.Fatal(err) } else { for _, instance := range instances { fmt.Println(instance.Key.DisplayString()) @@ -709,7 +687,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } instances, err := inst.FindInstances(pattern) if err != nil { - log.Fatale(err) + log.Fatal(err) } else { for _, instance := range instances { fmt.Println(instance.Key.DisplayString()) @@ -723,7 +701,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } instances, err := inst.SearchInstances(pattern) if err != nil { - log.Fatale(err) + log.Fatal(err) } else { for _, instance := range instances { fmt.Println(instance.Key.DisplayString()) @@ -734,7 +712,7 @@ func Cli(command string, strict bool, instance string, destination string, owner { clusters, err := inst.ReadClusters() if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(strings.Join(clusters, "\n")) } @@ -742,7 +720,7 @@ func Cli(command string, strict bool, instance string, destination string, owner { clusters, err := inst.ReadClustersInfo("") if err != nil { - log.Fatale(err) + log.Fatal(err) } for _, cluster := range clusters { fmt.Printf("%s\t%s\n", cluster.ClusterName, cluster.ClusterAlias) @@ -752,7 +730,7 @@ func Cli(command string, strict bool, instance string, destination string, owner { instances, err := inst.ReadWriteableClustersPrimaries() if err != nil { - log.Fatale(err) + log.Fatal(err) } else { for _, instance := range instances { fmt.Println(instance.Key.DisplayString()) @@ -764,7 +742,7 @@ func Cli(command string, strict bool, instance string, destination string, owner clusterName := getClusterName(clusterAlias, instanceKey) output, err := inst.ASCIITopology(clusterName, pattern, false, false) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(output) } @@ -773,7 +751,7 @@ func Cli(command string, strict bool, instance string, destination string, owner clusterName := getClusterName(clusterAlias, instanceKey) output, err := inst.ASCIITopology(clusterName, pattern, true, false) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(output) } @@ -782,7 +760,7 @@ func Cli(command string, strict bool, instance string, destination string, owner clusterName := getClusterName(clusterAlias, instanceKey) output, err := inst.ASCIITopology(clusterName, pattern, false, true) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(output) } @@ -790,7 +768,7 @@ func Cli(command string, strict bool, instance string, destination string, owner { instances, err := inst.SearchInstances("") if err != nil { - log.Fatale(err) + log.Fatal(err) } else { for _, instance := range instances { fmt.Println(instance.Key.DisplayString()) @@ -816,7 +794,7 @@ func Cli(command string, strict bool, instance string, destination string, owner clusterName := getClusterName(clusterAlias, instanceKey) clusterInfo, err := inst.ReadClusterInfo(clusterName) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(clusterInfo.ClusterAlias) } @@ -825,7 +803,7 @@ func Cli(command string, strict bool, instance string, destination string, owner clusterName := getClusterName(clusterAlias, instanceKey) clusterInfo, err := inst.ReadClusterInfo(clusterName) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(clusterInfo.ClusterDomain) } @@ -834,7 +812,7 @@ func Cli(command string, strict bool, instance string, destination string, owner clusterName := getClusterName(clusterAlias, instanceKey) instanceKey, err := inst.GetHeuristicClusterDomainInstanceAttribute(clusterName) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -843,7 +821,7 @@ func Cli(command string, strict bool, instance string, destination string, owner clusterName := getClusterName(clusterAlias, instanceKey) primaries, err := inst.ReadClusterPrimary(clusterName) if err != nil { - log.Fatale(err) + log.Fatal(err) } if len(primaries) == 0 { log.Fatalf("No writeable primaries found for cluster %+v", clusterName) @@ -855,7 +833,7 @@ func Cli(command string, strict bool, instance string, destination string, owner clusterName := getClusterName(clusterAlias, instanceKey) instances, err := inst.ReadClusterInstances(clusterName) if err != nil { - log.Fatale(err) + log.Fatal(err) } for _, clusterInstance := range instances { fmt.Println(clusterInstance.Key.DisplayString()) @@ -866,7 +844,7 @@ func Cli(command string, strict bool, instance string, destination string, owner clusterName := getClusterName(clusterAlias, instanceKey) instances, err := inst.GetClusterOSCReplicas(clusterName) if err != nil { - log.Fatale(err) + log.Fatal(err) } for _, clusterInstance := range instances { fmt.Println(clusterInstance.Key.DisplayString()) @@ -877,7 +855,7 @@ func Cli(command string, strict bool, instance string, destination string, owner clusterName := getClusterName(clusterAlias, instanceKey) instances, err := inst.GetClusterGhostReplicas(clusterName) if err != nil { - log.Fatale(err) + log.Fatal(err) } for _, clusterInstance := range instances { fmt.Println(clusterInstance.Key.DisplayString()) @@ -899,7 +877,7 @@ func Cli(command string, strict bool, instance string, destination string, owner clusterName := getClusterName(clusterAlias, instanceKey) instances, err := inst.ReadDowntimedInstances(clusterName) if err != nil { - log.Fatale(err) + log.Fatal(err) } for _, clusterInstance := range instances { fmt.Println(clusterInstance.Key.DisplayString()) @@ -913,7 +891,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } replicas, err := inst.ReadReplicaInstances(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } for _, replica := range replicas { fmt.Println(replica.Key.DisplayString()) @@ -923,7 +901,7 @@ func Cli(command string, strict bool, instance string, destination string, owner { instances, err := inst.ReadLostInRecoveryInstances("") if err != nil { - log.Fatale(err) + log.Fatal(err) } for _, instance := range instances { fmt.Println(instance.Key.DisplayString()) @@ -943,7 +921,7 @@ func Cli(command string, strict bool, instance string, destination string, owner clusterName := getClusterName(clusterAlias, instanceKey) lag, err := inst.GetClusterHeuristicLag(clusterName) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(lag) } @@ -952,7 +930,7 @@ func Cli(command string, strict bool, instance string, destination string, owner instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) tags, err := inst.ReadInstanceTags(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } for _, tag := range tags { fmt.Println(tag.String()) @@ -963,12 +941,12 @@ func Cli(command string, strict bool, instance string, destination string, owner instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) tag, err := inst.ParseTag(*config.RuntimeCLIFlags.Tag) if err != nil { - log.Fatale(err) + log.Fatal(err) } tagExists, err := inst.ReadInstanceTag(instanceKey, tag) if err != nil { - log.Fatale(err) + log.Fatal(err) } if tagExists { fmt.Println(tag.TagValue) @@ -979,7 +957,7 @@ func Cli(command string, strict bool, instance string, destination string, owner tagsString := *config.RuntimeCLIFlags.Tag instanceKeyMap, err := inst.GetInstanceKeysByTags(tagsString) if err != nil { - log.Fatale(err) + log.Fatal(err) } keysDisplayStrings := []string{} for _, key := range instanceKeyMap.GetInstanceKeys() { @@ -995,9 +973,9 @@ func Cli(command string, strict bool, instance string, destination string, owner instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) tag, err := inst.ParseTag(*config.RuntimeCLIFlags.Tag) if err != nil { - log.Fatale(err) + log.Fatal(err) } - inst.PutInstanceTag(instanceKey, tag) + _ = inst.PutInstanceTag(instanceKey, tag) fmt.Println(instanceKey.DisplayString()) } case registerCliCommand("untag", "tags", `Remove a tag from an instance`): @@ -1005,11 +983,11 @@ func Cli(command string, strict bool, instance string, destination string, owner instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) tag, err := inst.ParseTag(*config.RuntimeCLIFlags.Tag) if err != nil { - log.Fatale(err) + log.Fatal(err) } untagged, err := inst.Untag(instanceKey, tag) if err != nil { - log.Fatale(err) + log.Fatal(err) } for _, key := range untagged.GetInstanceKeys() { fmt.Println(key.DisplayString()) @@ -1019,11 +997,11 @@ func Cli(command string, strict bool, instance string, destination string, owner { tag, err := inst.ParseTag(*config.RuntimeCLIFlags.Tag) if err != nil { - log.Fatale(err) + log.Fatal(err) } untagged, err := inst.Untag(nil, tag) if err != nil { - log.Fatale(err) + log.Fatal(err) } for _, key := range untagged.GetInstanceKeys() { fmt.Println(key.DisplayString()) @@ -1041,19 +1019,19 @@ func Cli(command string, strict bool, instance string, destination string, owner } instance, err := inst.ReadTopologyInstance(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instance.Key.DisplayString()) } case registerCliCommand("forget", "Instance management", `Forget about an instance's existence`): { - if rawInstanceKey == nil { + if rawInstanceKey == nil { //nolint log.Fatal("Cannot deduce instance:", instance) } instanceKey, _ = inst.FigureInstanceKey(rawInstanceKey, nil) err := inst.ForgetInstance(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -1067,7 +1045,7 @@ func Cli(command string, strict bool, instance string, destination string, owner if duration != "" { durationSeconds, err = util.SimpleTimeToSeconds(duration) if err != nil { - log.Fatale(err) + log.Fatal(err) } if durationSeconds < 0 { log.Fatalf("Duration value must be non-negative. Given value: %d", durationSeconds) @@ -1079,7 +1057,7 @@ func Cli(command string, strict bool, instance string, destination string, owner log.Infof("Maintenance duration: %d seconds", durationSeconds) } if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -1088,7 +1066,7 @@ func Cli(command string, strict bool, instance string, destination string, owner instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) _, err := inst.EndMaintenanceByInstanceKey(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -1097,7 +1075,7 @@ func Cli(command string, strict bool, instance string, destination string, owner instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) inMaintenance, err := inst.InMaintenance(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } if inMaintenance { fmt.Println(instanceKey.DisplayString()) @@ -1113,7 +1091,7 @@ func Cli(command string, strict bool, instance string, destination string, owner if duration != "" { durationSeconds, err = util.SimpleTimeToSeconds(duration) if err != nil { - log.Fatale(err) + log.Fatal(err) } if durationSeconds < 0 { log.Fatalf("Duration value must be non-negative. Given value: %d", durationSeconds) @@ -1124,7 +1102,7 @@ func Cli(command string, strict bool, instance string, destination string, owner if err == nil { log.Infof("Downtime duration: %d seconds", durationSeconds) } else { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -1133,7 +1111,7 @@ func Cli(command string, strict bool, instance string, destination string, owner instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) _, err := inst.EndDowntime(instanceKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -1147,7 +1125,7 @@ func Cli(command string, strict bool, instance string, destination string, owner recoveryAttempted, promotedInstanceKey, err := logic.CheckAndRecover(instanceKey, destinationKey, (command == "recover-lite")) if err != nil { - log.Fatale(err) + log.Fatal(err) } if recoveryAttempted { if promotedInstanceKey == nil { @@ -1161,7 +1139,7 @@ func Cli(command string, strict bool, instance string, destination string, owner clusterName := getClusterName(clusterAlias, instanceKey) topologyRecovery, err := logic.ForcePrimaryFailover(clusterName) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(topologyRecovery.SuccessorKey.DisplayString()) } @@ -1174,7 +1152,7 @@ func Cli(command string, strict bool, instance string, destination string, owner destination := validateInstanceIsFound(destinationKey) topologyRecovery, err := logic.ForcePrimaryTakeover(clusterName, destination) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(topologyRecovery.SuccessorKey.DisplayString()) } @@ -1186,10 +1164,10 @@ func Cli(command string, strict bool, instance string, destination string, owner } topologyRecovery, err := logic.GracefulPrimaryTakeover(clusterName, destinationKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(topologyRecovery.SuccessorKey.DisplayString()) - log.Debugf("Promoted %+v as new primary.", topologyRecovery.SuccessorKey) + log.Infof("Promoted %+v as new primary.", topologyRecovery.SuccessorKey) } case registerCliCommand("graceful-primary-takeover-auto", "Recovery", `Gracefully promote a new primary. orchestrator will attempt to pick the promoted replica automatically`): { @@ -1201,16 +1179,16 @@ func Cli(command string, strict bool, instance string, destination string, owner } topologyRecovery, err := logic.GracefulPrimaryTakeover(clusterName, destinationKey) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(topologyRecovery.SuccessorKey.DisplayString()) - log.Debugf("Promoted %+v as new primary.", topologyRecovery.SuccessorKey) + log.Infof("Promoted %+v as new primary.", topologyRecovery.SuccessorKey) } case registerCliCommand("replication-analysis", "Recovery", `Request an analysis of potential crash incidents in all known topologies`): { analysis, err := inst.GetReplicationAnalysis("", &inst.ReplicationAnalysisHints{}) if err != nil { - log.Fatale(err) + log.Fatal(err) } for _, entry := range analysis { fmt.Printf("%s (cluster %s): %s\n", entry.AnalyzedInstanceKey.DisplayString(), entry.ClusterDetails.ClusterName, entry.AnalysisString()) @@ -1223,7 +1201,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } countRecoveries, err := logic.AcknowledgeAllRecoveries(inst.GetMaintenanceOwner(), reason) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Printf("%d recoveries acknowldged\n", countRecoveries) } @@ -1235,7 +1213,7 @@ func Cli(command string, strict bool, instance string, destination string, owner clusterName := getClusterName(clusterAlias, instanceKey) countRecoveries, err := logic.AcknowledgeClusterRecoveries(clusterName, inst.GetMaintenanceOwner(), reason) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Printf("%d recoveries acknowldged\n", countRecoveries) } @@ -1248,7 +1226,7 @@ func Cli(command string, strict bool, instance string, destination string, owner countRecoveries, err := logic.AcknowledgeInstanceRecoveries(instanceKey, inst.GetMaintenanceOwner(), reason) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Printf("%d recoveries acknowldged\n", countRecoveries) } @@ -1258,11 +1236,11 @@ func Cli(command string, strict bool, instance string, destination string, owner instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) promotionRule, err := promotionrule.Parse(*config.RuntimeCLIFlags.PromotionRule) if err != nil { - log.Fatale(err) + log.Fatal(err) } err = inst.RegisterCandidateInstance(inst.NewCandidateDatabaseInstance(instanceKey, promotionRule).WithCurrentTime()) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -1271,7 +1249,7 @@ func Cli(command string, strict bool, instance string, destination string, owner instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) err := inst.RegisterHostnameUnresolve(inst.NewHostnameRegistration(instanceKey, hostnameFlag)) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -1280,7 +1258,7 @@ func Cli(command string, strict bool, instance string, destination string, owner instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) err := inst.RegisterHostnameUnresolve(inst.NewHostnameDeregistration(instanceKey)) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -1289,7 +1267,7 @@ func Cli(command string, strict bool, instance string, destination string, owner clusterName := getClusterName(clusterAlias, instanceKey) instanceKey, err := inst.HeuristicallyApplyClusterDomainInstanceAttribute(clusterName) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(instanceKey.DisplayString()) } @@ -1299,7 +1277,7 @@ func Cli(command string, strict bool, instance string, destination string, owner { err := inst.SnapshotTopologies() if err != nil { - log.Fatale(err) + log.Fatal(err) } } case registerCliCommand("continuous", "Meta", `Enter continuous mode, and actively poll for instances, diagnose problems, do maintenance`): @@ -1310,7 +1288,7 @@ func Cli(command string, strict bool, instance string, destination string, owner { nodes, err := process.ReadAvailableNodes(false) if err != nil { - log.Fatale(err) + log.Fatal(err) } for _, node := range nodes { fmt.Println(node) @@ -1320,34 +1298,34 @@ func Cli(command string, strict bool, instance string, destination string, owner { publicToken, err := process.GenerateAccessToken(owner) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(publicToken) } case registerCliCommand("resolve", "Meta", `Resolve given hostname`): { - if rawInstanceKey == nil { + if rawInstanceKey == nil { //nolint log.Fatal("Cannot deduce instance:", instance) } if conn, err := net.Dial("tcp", rawInstanceKey.DisplayString()); err == nil { - log.Debugf("tcp test is good; got connection %+v", conn) - conn.Close() + log.Infof("tcp test is good; got connection %+v", conn) + _ = conn.Close() } else { - log.Fatale(err) + log.Fatal(err) } if cname, err := inst.GetCNAME(rawInstanceKey.Hostname); err == nil { //nolint - log.Debugf("GetCNAME() %+v, %+v", cname, err) + log.Infof("GetCNAME() %+v, %+v", cname, err) rawInstanceKey.Hostname = cname fmt.Println(rawInstanceKey.DisplayString()) } else { - log.Fatale(err) + log.Fatal(err) } } case registerCliCommand("reset-hostname-resolve-cache", "Meta", `Clear the hostname resolve cache`): { err := inst.ResetHostnameResolveCache() if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println("hostname resolve cache cleared") } @@ -1360,7 +1338,7 @@ func Cli(command string, strict bool, instance string, destination string, owner { resolves, err := inst.ReadAllHostnameResolves() if err != nil { - log.Fatale(err) + log.Fatal(err) } for _, r := range resolves { fmt.Println(r) @@ -1370,7 +1348,7 @@ func Cli(command string, strict bool, instance string, destination string, owner { unresolves, err := inst.ReadAllHostnameUnresolves() if err != nil { - log.Fatale(err) + log.Fatal(err) } for _, r := range unresolves { fmt.Println(r) @@ -1381,7 +1359,7 @@ func Cli(command string, strict bool, instance string, destination string, owner config.RuntimeCLIFlags.ConfiguredVersion = "" _, err := inst.ReadClusters() if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println("Redeployed internal db") } @@ -1390,7 +1368,7 @@ func Cli(command string, strict bool, instance string, destination string, owner destination := validateInstanceIsFound(destinationKey) replacement, _, err := logic.SuggestReplacementForPromotedReplica(&logic.TopologyRecovery{}, instanceKey, destination, nil) if err != nil { - log.Fatale(err) + log.Fatal(err) } fmt.Println(replacement.Key.DisplayString()) } @@ -1447,7 +1425,7 @@ func Cli(command string, strict bool, instance string, destination string, owner // Help case "help": { - fmt.Fprint(os.Stderr, availableCommandsUsage()) + _, _ = fmt.Fprint(os.Stderr, availableCommandsUsage()) } default: log.Fatalf("Unknown command: \"%s\". %s", command, availableCommandsUsage()) diff --git a/go/vt/orchestrator/app/cli_test.go b/go/vt/orchestrator/app/cli_test.go index a527c855d68..6df8dd8a49e 100644 --- a/go/vt/orchestrator/app/cli_test.go +++ b/go/vt/orchestrator/app/cli_test.go @@ -4,14 +4,12 @@ import ( "testing" "vitess.io/vitess/go/vt/orchestrator/config" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" test "vitess.io/vitess/go/vt/orchestrator/external/golib/tests" ) func init() { config.Config.HostnameResolveMethod = "none" config.MarkConfigurationLoaded() - log.SetLevel(log.ERROR) } func TestKnownCommands(t *testing.T) { diff --git a/go/vt/orchestrator/app/http.go b/go/vt/orchestrator/app/http.go index 12e09b83e8b..bc9a7d256cf 100644 --- a/go/vt/orchestrator/app/http.go +++ b/go/vt/orchestrator/app/http.go @@ -24,6 +24,8 @@ import ( "strings" "time" + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/orchestrator/collection" "vitess.io/vitess/go/vt/orchestrator/config" "vitess.io/vitess/go/vt/orchestrator/http" @@ -36,8 +38,6 @@ import ( "github.com/martini-contrib/auth" "github.com/martini-contrib/gzip" "github.com/martini-contrib/render" - - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" ) const discoveryMetricsName = "DISCOVERY_METRICS" @@ -142,29 +142,29 @@ func standardHTTP(continuousDiscovery bool) { log.Infof("Starting HTTP listener on unix socket %v", config.Config.ListenSocket) unixListener, err := net.Listen("unix", config.Config.ListenSocket) if err != nil { - log.Fatale(err) + log.Fatal(err) } defer unixListener.Close() if err := nethttp.Serve(unixListener, m); err != nil { - log.Fatale(err) + log.Fatal(err) } } else if config.Config.UseSSL { log.Info("Starting HTTPS listener") tlsConfig, err := ssl.NewTLSConfig(config.Config.SSLCAFile, config.Config.UseMutualTLS) if err != nil { - log.Fatale(err) + log.Fatal(err) } tlsConfig.InsecureSkipVerify = config.Config.SSLSkipVerify if err = ssl.AppendKeyPairWithPassword(tlsConfig, config.Config.SSLCertFile, config.Config.SSLPrivateKeyFile, sslPEMPassword); err != nil { - log.Fatale(err) + log.Fatal(err) } if err = ssl.ListenAndServeTLS(config.Config.ListenAddress, m, tlsConfig); err != nil { - log.Fatale(err) + log.Fatal(err) } } else { log.Infof("Starting HTTP listener on %+v", config.Config.ListenAddress) if err := nethttp.ListenAndServe(config.Config.ListenAddress, m); err != nil { - log.Fatale(err) + log.Fatal(err) } } log.Info("Web server started") diff --git a/go/vt/orchestrator/attributes/attributes_dao.go b/go/vt/orchestrator/attributes/attributes_dao.go index 03f70e7ce9b..61bcb4920e4 100644 --- a/go/vt/orchestrator/attributes/attributes_dao.go +++ b/go/vt/orchestrator/attributes/attributes_dao.go @@ -18,10 +18,10 @@ package attributes import ( "fmt" - "strings" + + "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/orchestrator/db" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" ) @@ -40,7 +40,8 @@ func SetHostAttributes(hostname string, attributeName string, attributeValue str attributeValue, ) if err != nil { - return log.Errore(err) + log.Error(err) + return err } return err @@ -75,36 +76,11 @@ func getHostAttributesByClause(whereClause string, args []any) ([]HostAttributes }) if err != nil { - log.Errore(err) + log.Error(err) } return res, err } -// GetHostAttributesByMatch -func GetHostAttributesByMatch(hostnameMatch string, attributeNameMatch string, attributeValueMatch string) ([]HostAttributes, error) { - terms := []string{} - args := sqlutils.Args() - if hostnameMatch != "" { - terms = append(terms, ` hostname rlike ? `) - args = append(args, hostnameMatch) - } - if attributeNameMatch != "" { - terms = append(terms, ` attribute_name rlike ? `) - args = append(args, attributeNameMatch) - } - if attributeValueMatch != "" { - terms = append(terms, ` attribute_value rlike ? `) - args = append(args, attributeValueMatch) - } - - if len(terms) == 0 { - return getHostAttributesByClause("", args) - } - whereCondition := fmt.Sprintf(" where %s ", strings.Join(terms, " and ")) - - return getHostAttributesByClause(whereCondition, args) -} - // GetHostAttribute expects to return a single attribute for a given hostname/attribute-name combination // or error on empty result func GetHostAttribute(hostname string, attributeName string) (string, error) { @@ -114,7 +90,8 @@ func GetHostAttribute(hostname string, attributeName string) (string, error) { return "", err } if len(attributeName) == 0 { - return "", log.Errorf("No attribute found for %+v, %+v", hostname, attributeName) + log.Errorf("No attribute found for %+v, %+v", hostname, attributeName) + return "", fmt.Errorf("No attribute found for %+v, %+v", hostname, attributeName) } return attributes[0].AttributeValue, nil } @@ -131,13 +108,3 @@ func SetGeneralAttribute(attributeName string, attributeValue string) error { func GetGeneralAttribute(attributeName string) (result string, err error) { return GetHostAttribute("*", attributeName) } - -// GetHostAttributesByAttribute -func GetHostAttributesByAttribute(attributeName string, valueMatch string) ([]HostAttributes, error) { - if valueMatch == "" { - valueMatch = ".?" - } - whereClause := ` where attribute_name = ? and attribute_value rlike ?` - - return getHostAttributesByClause(whereClause, sqlutils.Args(attributeName, valueMatch)) -} diff --git a/go/vt/orchestrator/collection/collection.go b/go/vt/orchestrator/collection/collection.go index 5098bd5d569..6c83bf33205 100644 --- a/go/vt/orchestrator/collection/collection.go +++ b/go/vt/orchestrator/collection/collection.go @@ -64,7 +64,7 @@ import ( "sync" "time" - // "vitess.io/vitess/go/vt/orchestrator/external/golib/log" + // "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/orchestrator/config" ) @@ -174,7 +174,7 @@ func (c *Collection) StartAutoExpiration() { c.monitoring = true c.Unlock() - // log.Infof("StartAutoExpiration: %p with expirePeriod: %v", c, c.expirePeriod) + //log.Infof("StartAutoExpiration: %p with expirePeriod: %v", c, c.expirePeriod) ticker := time.NewTicker(defaultExpireTickerPeriod) for { diff --git a/go/vt/orchestrator/config/config.go b/go/vt/orchestrator/config/config.go index c69ee363d3a..4833f609069 100644 --- a/go/vt/orchestrator/config/config.go +++ b/go/vt/orchestrator/config/config.go @@ -24,9 +24,9 @@ import ( "regexp" "strings" - "gopkg.in/gcfg.v1" + "vitess.io/vitess/go/vt/log" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" + "gopkg.in/gcfg.v1" ) var ( @@ -393,7 +393,7 @@ func (config *Configuration) postReadAdjustments() error { if err != nil { log.Fatalf("Failed to parse gcfg data from file: %+v", err) } else { - log.Debugf("Parsed orchestrator credentials from %s", config.MySQLOrchestratorCredentialsConfigFile) + log.Infof("Parsed orchestrator credentials from %s", config.MySQLOrchestratorCredentialsConfigFile) config.MySQLOrchestratorUser = mySQLConfig.Client.User config.MySQLOrchestratorPassword = mySQLConfig.Client.Password } @@ -417,7 +417,7 @@ func (config *Configuration) postReadAdjustments() error { if err != nil { log.Fatalf("Failed to parse gcfg data from file: %+v", err) } else { - log.Debugf("Parsed topology credentials from %s", config.MySQLTopologyCredentialsConfigFile) + log.Infof("Parsed topology credentials from %s", config.MySQLTopologyCredentialsConfigFile) config.MySQLTopologyUser = mySQLConfig.Client.User config.MySQLTopologyPassword = mySQLConfig.Client.Password } @@ -514,7 +514,7 @@ func read(fileName string) (*Configuration, error) { log.Fatal("Cannot read config file:", fileName, err) } if err := Config.postReadAdjustments(); err != nil { - log.Fatale(err) + log.Fatal(err) } return Config, err } diff --git a/go/vt/orchestrator/config/config_test.go b/go/vt/orchestrator/config/config_test.go index 0a91701c1f9..b9da3bc128d 100644 --- a/go/vt/orchestrator/config/config_test.go +++ b/go/vt/orchestrator/config/config_test.go @@ -3,13 +3,11 @@ package config import ( "testing" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" test "vitess.io/vitess/go/vt/orchestrator/external/golib/tests" ) func init() { Config.HostnameResolveMethod = "none" - log.SetLevel(log.ERROR) } func TestRecoveryPeriodBlock(t *testing.T) { diff --git a/go/vt/orchestrator/db/db.go b/go/vt/orchestrator/db/db.go index 878bce9242f..5cb033adace 100644 --- a/go/vt/orchestrator/db/db.go +++ b/go/vt/orchestrator/db/db.go @@ -23,8 +23,8 @@ import ( "sync" "time" + "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/orchestrator/config" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" ) @@ -145,7 +145,7 @@ func OpenOrchestrator() (db *sql.DB, err error) { if IsSQLite() { db, fromCache, err = sqlutils.GetSQLiteDB(config.Config.SQLite3DataFile) if err == nil && !fromCache { - log.Debugf("Connected to orchestrator backend: sqlite on %v", config.Config.SQLite3DataFile) + log.Infof("Connected to orchestrator backend: sqlite on %v", config.Config.SQLite3DataFile) } if db != nil { db.SetMaxOpenConns(1) @@ -153,12 +153,14 @@ func OpenOrchestrator() (db *sql.DB, err error) { } } else { if db, fromCache, err := openOrchestratorMySQLGeneric(); err != nil { - return db, log.Errore(err) + log.Errorf(err.Error()) + return db, err } else if !fromCache { // first time ever we talk to MySQL query := fmt.Sprintf("create database if not exists %s", config.Config.MySQLOrchestratorDatabase) if _, err := db.Exec(query); err != nil { - return db, log.Errore(err) + log.Errorf(err.Error()) + return db, err } } db, fromCache, err = sqlutils.GetDB(getMySQLURI()) @@ -166,9 +168,9 @@ func OpenOrchestrator() (db *sql.DB, err error) { // do not show the password but do show what we connect to. safeMySQLURI := fmt.Sprintf("%s:?@tcp(%s:%d)/%s?timeout=%ds", config.Config.MySQLOrchestratorUser, config.Config.MySQLOrchestratorHost, config.Config.MySQLOrchestratorPort, config.Config.MySQLOrchestratorDatabase, config.Config.MySQLConnectTimeoutSeconds) - log.Debugf("Connected to orchestrator backend: %v", safeMySQLURI) + log.Infof("Connected to orchestrator backend: %v", safeMySQLURI) if config.Config.MySQLOrchestratorMaxPoolConnections > 0 { - log.Debugf("Orchestrator pool SetMaxOpenConns: %d", config.Config.MySQLOrchestratorMaxPoolConnections) + log.Infof("Orchestrator pool SetMaxOpenConns: %d", config.Config.MySQLOrchestratorMaxPoolConnections) db.SetMaxOpenConns(config.Config.MySQLOrchestratorMaxPoolConnections) } if config.Config.MySQLConnectionLifetimeSeconds > 0 { @@ -239,7 +241,7 @@ func registerOrchestratorDeployment(db *sql.DB) error { if _, err := execInternal(db, query, config.RuntimeCLIFlags.ConfiguredVersion); err != nil { log.Fatalf("Unable to write to orchestrator_metadata: %+v", err) } - log.Debugf("Migrated database schema to version [%+v]", config.RuntimeCLIFlags.ConfiguredVersion) + log.Infof("Migrated database schema to version [%+v]", config.RuntimeCLIFlags.ConfiguredVersion) return nil } @@ -248,7 +250,7 @@ func registerOrchestratorDeployment(db *sql.DB) error { func deployStatements(db *sql.DB, queries []string) error { tx, err := db.Begin() if err != nil { - log.Fatale(err) + log.Fatal(err.Error()) } // Ugly workaround ahead. // Origin of this workaround is the existence of some "timestamp NOT NULL," column definitions, @@ -262,23 +264,26 @@ func deployStatements(db *sql.DB, queries []string) error { if config.Config.IsMySQL() { _ = tx.QueryRow(`select @@session.sql_mode`).Scan(&originalSQLMode) if _, err := tx.Exec(`set @@session.sql_mode=REPLACE(@@session.sql_mode, 'NO_ZERO_DATE', '')`); err != nil { - log.Fatale(err) + log.Fatal(err.Error()) } if _, err := tx.Exec(`set @@session.sql_mode=REPLACE(@@session.sql_mode, 'NO_ZERO_IN_DATE', '')`); err != nil { - log.Fatale(err) + log.Fatal(err.Error()) } } for _, query := range queries { query, err := translateStatement(query) if err != nil { - return log.Fatalf("Cannot initiate orchestrator: %+v; query=%+v", err, query) + log.Fatalf("Cannot initiate orchestrator: %+v; query=%+v", err, query) + return err } if _, err := tx.Exec(query); err != nil { if strings.Contains(err.Error(), "syntax error") { - return log.Fatalf("Cannot initiate orchestrator: %+v; query=%+v", err, query) + log.Fatalf("Cannot initiate orchestrator: %+v; query=%+v", err, query) + return err } if !sqlutils.IsAlterTable(query) && !sqlutils.IsCreateIndex(query) && !sqlutils.IsDropIndex(query) { - return log.Fatalf("Cannot initiate orchestrator: %+v; query=%+v", err, query) + log.Fatalf("Cannot initiate orchestrator: %+v; query=%+v", err, query) + return err } if !strings.Contains(err.Error(), "duplicate column name") && !strings.Contains(err.Error(), "Duplicate column name") && @@ -291,11 +296,11 @@ func deployStatements(db *sql.DB, queries []string) error { } if config.Config.IsMySQL() { if _, err := tx.Exec(`set session sql_mode=?`, originalSQLMode); err != nil { - log.Fatale(err) + log.Fatal(err.Error()) } } if err := tx.Commit(); err != nil { - log.Fatale(err) + log.Fatal(err.Error()) } return nil } @@ -303,7 +308,7 @@ func deployStatements(db *sql.DB, queries []string) error { // initOrchestratorDB attempts to create/upgrade the orchestrator backend database. It is created once in the // application's lifetime. func initOrchestratorDB(db *sql.DB) error { - log.Debug("Initializing orchestrator") + log.Info("Initializing orchestrator") versionAlreadyDeployed, err := versionIsDeployed(db) if versionAlreadyDeployed && config.RuntimeCLIFlags.ConfiguredVersion != "" && err == nil { @@ -313,7 +318,7 @@ func initOrchestratorDB(db *sql.DB) error { if config.Config.PanicIfDifferentDatabaseDeploy && config.RuntimeCLIFlags.ConfiguredVersion != "" && !versionAlreadyDeployed { log.Fatalf("PanicIfDifferentDatabaseDeploy is set. Configured version %s is not the version found in the database", config.RuntimeCLIFlags.ConfiguredVersion) } - log.Debugf("Migrating database schema") + log.Info("Migrating database schema") deployStatements(db, generateSQLBase) deployStatements(db, generateSQLPatches) registerOrchestratorDeployment(db) @@ -356,7 +361,8 @@ func ExecOrchestrator(query string, args ...any) (sql.Result, error) { func QueryOrchestratorRowsMap(query string, onRow func(sqlutils.RowMap) error) error { query, err := translateStatement(query) if err != nil { - return log.Fatalf("Cannot query orchestrator: %+v; query=%+v", err, query) + log.Fatalf("Cannot query orchestrator: %+v; query=%+v", err, query) + return err } db, err := OpenOrchestrator() if err != nil { @@ -370,21 +376,27 @@ func QueryOrchestratorRowsMap(query string, onRow func(sqlutils.RowMap) error) e func QueryOrchestrator(query string, argsArray []any, onRow func(sqlutils.RowMap) error) error { query, err := translateStatement(query) if err != nil { - return log.Fatalf("Cannot query orchestrator: %+v; query=%+v", err, query) + log.Fatalf("Cannot query orchestrator: %+v; query=%+v", err, query) + return err } db, err := OpenOrchestrator() if err != nil { return err } - return log.Criticale(sqlutils.QueryRowsMap(db, query, onRow, argsArray...)) + if err = sqlutils.QueryRowsMap(db, query, onRow, argsArray...); err != nil { + log.Warning(err.Error()) + } + + return err } // QueryOrchestratorRowsMapBuffered func QueryOrchestratorRowsMapBuffered(query string, onRow func(sqlutils.RowMap) error) error { query, err := translateStatement(query) if err != nil { - return log.Fatalf("Cannot query orchestrator: %+v; query=%+v", err, query) + log.Fatalf("Cannot query orchestrator: %+v; query=%+v", err, query) + return err } db, err := OpenOrchestrator() if err != nil { @@ -398,7 +410,8 @@ func QueryOrchestratorRowsMapBuffered(query string, onRow func(sqlutils.RowMap) func QueryOrchestratorBuffered(query string, argsArray []any, onRow func(sqlutils.RowMap) error) error { query, err := translateStatement(query) if err != nil { - return log.Fatalf("Cannot query orchestrator: %+v; query=%+v", err, query) + log.Fatalf("Cannot query orchestrator: %+v; query=%+v", err, query) + return err } db, err := OpenOrchestrator() if err != nil { @@ -408,7 +421,10 @@ func QueryOrchestratorBuffered(query string, argsArray []any, onRow func(sqlutil if argsArray == nil { argsArray = EmptyArgs } - return log.Criticale(sqlutils.QueryRowsMapBuffered(db, query, onRow, argsArray...)) + if err = sqlutils.QueryRowsMapBuffered(db, query, onRow, argsArray...); err != nil { + log.Warning(err.Error()) + } + return err } // ReadTimeNow reads and returns the current timestamp as string. This is an unfortunate workaround diff --git a/go/vt/orchestrator/db/tls.go b/go/vt/orchestrator/db/tls.go index ab31a77bc3f..45894da47fd 100644 --- a/go/vt/orchestrator/db/tls.go +++ b/go/vt/orchestrator/db/tls.go @@ -22,11 +22,12 @@ import ( "strings" "time" + "vitess.io/vitess/go/vt/log" + "github.com/go-sql-driver/mysql" "github.com/patrickmn/go-cache" "github.com/rcrowley/go-metrics" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" "vitess.io/vitess/go/vt/orchestrator/config" @@ -81,7 +82,7 @@ func requiresTLS(host string, port int, uri string) bool { required=values(required) ` if _, err := ExecOrchestrator(query, host, port, required); err != nil { - log.Errore(err) + log.Error(err) } writeInstanceTLSCounter.Inc(1) @@ -100,7 +101,8 @@ func SetupMySQLTopologyTLS(uri string) (string, error) { // Drop to TLS 1.0 for talking to MySQL tlsConfig.MinVersion = tls.VersionTLS10 if err != nil { - return "", log.Errorf("Can't create TLS configuration for Topology connection %s: %s", uri, err) + log.Errorf("Can't create TLS configuration for Topology connection %s: %s", uri, err) + return "", err } tlsConfig.InsecureSkipVerify = config.Config.MySQLTopologySSLSkipVerify @@ -108,11 +110,13 @@ func SetupMySQLTopologyTLS(uri string) (string, error) { config.Config.MySQLTopologySSLCertFile != "" && config.Config.MySQLTopologySSLPrivateKeyFile != "" { if err = ssl.AppendKeyPair(tlsConfig, config.Config.MySQLTopologySSLCertFile, config.Config.MySQLTopologySSLPrivateKeyFile); err != nil { - return "", log.Errorf("Can't setup TLS key pairs for %s: %s", uri, err) + log.Errorf("Can't setup TLS key pairs for %s: %s", uri, err) + return "", err } } if err = mysql.RegisterTLSConfig("topology", tlsConfig); err != nil { - return "", log.Errorf("Can't register mysql TLS config for topology: %s", err) + log.Errorf("Can't register mysql TLS config for topology: %s", err) + return "", err } topologyTLSConfigured = true } @@ -128,18 +132,21 @@ func SetupMySQLOrchestratorTLS(uri string) (string, error) { // Drop to TLS 1.0 for talking to MySQL tlsConfig.MinVersion = tls.VersionTLS10 if err != nil { - return "", log.Fatalf("Can't create TLS configuration for Orchestrator connection %s: %s", uri, err) + log.Fatalf("Can't create TLS configuration for Orchestrator connection %s: %s", uri, err) + return "", err } tlsConfig.InsecureSkipVerify = config.Config.MySQLOrchestratorSSLSkipVerify if (!config.Config.MySQLOrchestratorSSLSkipVerify) && config.Config.MySQLOrchestratorSSLCertFile != "" && config.Config.MySQLOrchestratorSSLPrivateKeyFile != "" { if err = ssl.AppendKeyPair(tlsConfig, config.Config.MySQLOrchestratorSSLCertFile, config.Config.MySQLOrchestratorSSLPrivateKeyFile); err != nil { - return "", log.Fatalf("Can't setup TLS key pairs for %s: %s", uri, err) + log.Fatalf("Can't setup TLS key pairs for %s: %s", uri, err) + return "", err } } if err = mysql.RegisterTLSConfig("orchestrator", tlsConfig); err != nil { - return "", log.Fatalf("Can't register mysql TLS config for orchestrator: %s", err) + log.Fatalf("Can't register mysql TLS config for orchestrator: %s", err) + return "", err } orchestratorTLSConfigured = true } diff --git a/go/vt/orchestrator/discovery/queue.go b/go/vt/orchestrator/discovery/queue.go index e43829213f9..ce6d475bd68 100644 --- a/go/vt/orchestrator/discovery/queue.go +++ b/go/vt/orchestrator/discovery/queue.go @@ -29,8 +29,8 @@ import ( "sync" "time" + "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/orchestrator/config" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/inst" ) @@ -93,7 +93,7 @@ func CreateOrReturnQueue(name string) *Queue { // monitoring queue sizes until we are told to stop func (q *Queue) startMonitoring() { - log.Debugf("Queue.startMonitoring(%s)", q.name) + log.Infof("Queue.startMonitoring(%s)", q.name) ticker := time.NewTicker(time.Second) // hard-coded at every second for { diff --git a/go/vt/orchestrator/discovery/queue_aggregated_stats.go b/go/vt/orchestrator/discovery/queue_aggregated_stats.go index d9e103a652c..79f2e310a58 100644 --- a/go/vt/orchestrator/discovery/queue_aggregated_stats.go +++ b/go/vt/orchestrator/discovery/queue_aggregated_stats.go @@ -19,7 +19,7 @@ package discovery import ( "github.com/montanaflynn/stats" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" + "vitess.io/vitess/go/vt/log" ) // AggregatedQueueMetrics contains aggregate information some part queue metrics @@ -55,12 +55,12 @@ func (q *Queue) DiscoveryQueueMetrics(period int) []QueueMetric { // adjust period in case we ask for something that's too long if period > len(q.metrics) { - log.Debugf("DiscoveryQueueMetrics: wanted: %d, adjusting period to %d", period, len(q.metrics)) + log.Infof("DiscoveryQueueMetrics: wanted: %d, adjusting period to %d", period, len(q.metrics)) period = len(q.metrics) } a := q.metrics[len(q.metrics)-period:] - log.Debugf("DiscoveryQueueMetrics: returning values: %+v", a) + log.Infof("DiscoveryQueueMetrics: returning values: %+v", a) return a } @@ -90,6 +90,6 @@ func (q *Queue) AggregatedDiscoveryQueueMetrics(period int) *AggregatedQueueMetr QueuedP95Entries: percentile(intSliceToFloat64Slice(queuedEntries), 95), QueuedMaxEntries: max(intSliceToFloat64Slice(queuedEntries)), } - log.Debugf("AggregatedDiscoveryQueueMetrics: returning values: %+v", a) + log.Infof("AggregatedDiscoveryQueueMetrics: returning values: %+v", a) return a } diff --git a/go/vt/orchestrator/external/golib/log/log.go b/go/vt/orchestrator/external/golib/log/log.go deleted file mode 100644 index ddfbc0a850e..00000000000 --- a/go/vt/orchestrator/external/golib/log/log.go +++ /dev/null @@ -1,294 +0,0 @@ -/* - Copyright 2014 Outbrain Inc. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package log - -import ( - "errors" - "fmt" - "log/syslog" - "os" - "runtime" - "runtime/debug" - "strings" - "time" - - "vitess.io/vitess/go/vt/log" -) - -// LogLevel indicates the severity of a log entry -type LogLevel int - -func (this LogLevel) String() string { - switch this { - case FATAL: - return "FATAL" - case CRITICAL: - return "CRITICAL" - case ERROR: - return "ERROR" - case WARNING: - return "WARNING" - case NOTICE: - return "NOTICE" - case INFO: - return "INFO" - case DEBUG: - return "DEBUG" - } - return "unknown" -} - -func LogLevelFromString(logLevelName string) (LogLevel, error) { - switch logLevelName { - case "FATAL": - return FATAL, nil - case "CRITICAL": - return CRITICAL, nil - case "ERROR": - return ERROR, nil - case "WARNING": - return WARNING, nil - case "NOTICE": - return NOTICE, nil - case "INFO": - return INFO, nil - case "DEBUG": - return DEBUG, nil - } - return 0, fmt.Errorf("Unknown LogLevel name: %+v", logLevelName) -} - -const ( - FATAL LogLevel = iota - CRITICAL - ERROR - WARNING - NOTICE - INFO - DEBUG -) - -const TimeFormat = "2006-01-02 15:04:05" - -// globalLogLevel indicates the global level filter for all logs (only entries with level equals or higher -// than this value will be logged) -var globalLogLevel LogLevel = DEBUG -var printStackTrace bool = false - -// syslogWriter is optional, and defaults to nil (disabled) -var syslogLevel LogLevel = ERROR -var syslogWriter *syslog.Writer - -// SetPrintStackTrace enables/disables dumping the stack upon error logging -func SetPrintStackTrace(shouldPrintStackTrace bool) { - printStackTrace = shouldPrintStackTrace -} - -// SetLevel sets the global log level. Only entries with level equals or higher than -// this value will be logged -func SetLevel(logLevel LogLevel) { - globalLogLevel = logLevel -} - -// GetLevel returns current global log level -func GetLevel() LogLevel { - return globalLogLevel -} - -// EnableSyslogWriter enables, if possible, writes to syslog. These will execute _in addition_ to normal logging -func EnableSyslogWriter(tag string) (err error) { - syslogWriter, err = syslog.New(syslog.LOG_ERR, tag) - if err != nil { - syslogWriter = nil - } - return err -} - -// SetSyslogLevel sets the minimal syslog level. Only entries with level equals or higher than -// this value will be logged. However, this is also capped by the global log level. That is, -// messages with lower level than global-log-level will be discarded at any case. -func SetSyslogLevel(logLevel LogLevel) { - syslogLevel = logLevel -} - -// logFormattedEntry nicely formats and emits a log entry -func logFormattedEntry(logLevel LogLevel, message string, args ...any) string { - return logDepth(logLevel, 0, message, args...) -} - -// logFormattedEntry nicely formats and emits a log entry -func logDepth(logLevel LogLevel, depth int, message string, args ...any) string { - if logLevel > globalLogLevel { - return "" - } - // if TZ env variable is set, update the timestamp timezone - localizedTime := time.Now() - tzLocation := os.Getenv("TZ") - if tzLocation != "" { - location, err := time.LoadLocation(tzLocation) - if err == nil { // if invalid tz location was provided, just leave it as the default - localizedTime = time.Now().In(location) - } - } - - msgArgs := fmt.Sprintf(message, args...) - sourceFile, pos := callerPos(depth) - entryString := fmt.Sprintf("%s %8s %s:%d] %s", localizedTime.Format(TimeFormat), logLevel, sourceFile, pos, msgArgs) - fmt.Fprintln(os.Stderr, entryString) - - if syslogWriter != nil { - go func() error { - if logLevel > syslogLevel { - return nil - } - switch logLevel { - case FATAL: - return syslogWriter.Emerg(msgArgs) - case CRITICAL: - return syslogWriter.Crit(msgArgs) - case ERROR: - return syslogWriter.Err(msgArgs) - case WARNING: - return syslogWriter.Warning(msgArgs) - case NOTICE: - return syslogWriter.Notice(msgArgs) - case INFO: - return syslogWriter.Info(msgArgs) - case DEBUG: - return syslogWriter.Debug(msgArgs) - } - return nil - }() - } - return entryString -} - -func callerPos(depth int) (string, int) { - _, file, line, ok := runtime.Caller(4 + depth) - if !ok { - file = "???" - line = 1 - } else { - slash := strings.LastIndex(file, "/") - if slash >= 0 { - file = file[slash+1:] - } - } - return file, line -} - -// logEntry emits a formatted log entry -func logEntry(logLevel LogLevel, message string, args ...any) string { - entryString := message - for _, s := range args { - entryString += fmt.Sprintf(" %s", s) - } - return logDepth(logLevel, 1, entryString) -} - -// logErrorEntry emits a log entry based on given error object -func logErrorEntry(logLevel LogLevel, err error) error { - if err == nil { - // No error - return nil - } - entryString := fmt.Sprintf("%+v", err) - logEntry(logLevel, entryString) - if printStackTrace { - debug.PrintStack() - } - return err -} - -func Debug(message string, args ...any) string { - return logEntry(DEBUG, message, args...) -} - -func Debugf(message string, args ...any) string { - return logFormattedEntry(DEBUG, message, args...) -} - -func Info(message string, args ...any) string { - log.Infof(message, args...) - return fmt.Sprintf(message, args...) -} - -func Infof(message string, args ...any) string { - return logFormattedEntry(INFO, message, args...) -} - -func Notice(message string, args ...any) string { - return logEntry(NOTICE, message, args...) -} - -func Noticef(message string, args ...any) string { - return logFormattedEntry(NOTICE, message, args...) -} - -func Warning(message string, args ...any) error { - return errors.New(logEntry(WARNING, message, args...)) -} - -func Warningf(message string, args ...any) error { - return errors.New(logFormattedEntry(WARNING, message, args...)) -} - -func Error(message string, args ...any) error { - return errors.New(logEntry(ERROR, message, args...)) -} - -func Errorf(message string, args ...any) error { - log.Infof(message, args...) - return fmt.Errorf(message, args...) -} - -func Errore(err error) error { - return logErrorEntry(ERROR, err) -} - -func Critical(message string, args ...any) error { - return errors.New(logEntry(CRITICAL, message, args...)) -} - -func Criticalf(message string, args ...any) error { - return errors.New(logFormattedEntry(CRITICAL, message, args...)) -} - -func Criticale(err error) error { - return logErrorEntry(CRITICAL, err) -} - -// Fatal emits a FATAL level entry and exists the program -func Fatal(message string, args ...any) error { - logEntry(FATAL, message, args...) - os.Exit(1) - return errors.New(logEntry(CRITICAL, message, args...)) -} - -// Fatalf emits a FATAL level entry and exists the program -func Fatalf(message string, args ...any) error { - logFormattedEntry(FATAL, message, args...) - os.Exit(1) - return errors.New(logFormattedEntry(CRITICAL, message, args...)) -} - -// Fatale emits a FATAL level entry and exists the program -func Fatale(err error) error { - logErrorEntry(FATAL, err) - os.Exit(1) - return err -} diff --git a/go/vt/orchestrator/external/golib/sqlutils/sqlutils.go b/go/vt/orchestrator/external/golib/sqlutils/sqlutils.go index dc9709c06b6..c8885fc3ae6 100644 --- a/go/vt/orchestrator/external/golib/sqlutils/sqlutils.go +++ b/go/vt/orchestrator/external/golib/sqlutils/sqlutils.go @@ -25,7 +25,7 @@ import ( "sync" "time" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" + "vitess.io/vitess/go/vt/log" ) const DateTimeFormat = "2006-01-02 15:04:05.999999" @@ -266,7 +266,8 @@ func QueryRowsMap(db *sql.DB, query string, on_row func(RowMap) error, args ...a defer rows.Close() } if err != nil && err != sql.ErrNoRows { - return log.Errore(err) + log.Error(err) + return err } err = ScanRowsToMaps(rows, on_row) return @@ -283,7 +284,8 @@ func queryResultData(db *sql.DB, query string, retrieveColumns bool, args ...any var rows *sql.Rows rows, err = db.Query(query, args...) if err != nil && err != sql.ErrNoRows { - return EmptyResultData, columns, log.Errore(err) + log.Error(err) + return EmptyResultData, columns, err } defer rows.Close() @@ -339,7 +341,7 @@ func ExecNoPrepare(db *sql.DB, query string, args ...any) (res sql.Result, err e res, err = db.Exec(query, args...) if err != nil { - log.Errore(err) + log.Error(err) } return res, err } @@ -360,7 +362,7 @@ func execInternal(silent bool, db *sql.DB, query string, args ...any) (res sql.R defer stmt.Close() res, err = stmt.Exec(args...) if err != nil && !silent { - log.Errore(err) + log.Error(err) } return res, err } diff --git a/go/vt/orchestrator/external/zk/zk.go b/go/vt/orchestrator/external/zk/zk.go index 8007e842117..4f83464206c 100644 --- a/go/vt/orchestrator/external/zk/zk.go +++ b/go/vt/orchestrator/external/zk/zk.go @@ -28,9 +28,9 @@ import ( "strings" "time" - "github.com/samuel/go-zookeeper/zk" + "vitess.io/vitess/go/vt/log" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" + "github.com/samuel/go-zookeeper/zk" ) type ZooKeeper struct { @@ -59,12 +59,12 @@ func (zook *ZooKeeper) SetServers(serversArray []string) { } func (zook *ZooKeeper) SetAuth(scheme string, auth []byte) { - log.Debug("Setting Auth ") + log.Info("Setting Auth ") zook.authScheme = scheme zook.authExpression = auth } -// Returns acls +// BuildACL returns Acls func (zook *ZooKeeper) BuildACL(authScheme string, user string, pwd string, acls string) (perms []zk.ACL, err error) { aclsList := strings.Split(acls, ",") for _, elem := range aclsList { @@ -80,7 +80,7 @@ func (zook *ZooKeeper) BuildACL(authScheme string, user string, pwd string, acls type infoLogger struct{} -func (_ infoLogger) Printf(format string, a ...any) { +func (infoLogger) Printf(format string, a ...any) { log.Infof(format, a...) } @@ -89,7 +89,7 @@ func (zook *ZooKeeper) connect() (*zk.Conn, error) { zk.DefaultLogger = &infoLogger{} conn, _, err := zk.Connect(zook.servers, time.Second) if err == nil && zook.authScheme != "" { - log.Debugf("Add Auth %s %s", zook.authScheme, zook.authExpression) + log.Infof("Add Auth %s %s", zook.authScheme, zook.authExpression) err = conn.AddAuth(zook.authScheme, zook.authExpression) } @@ -180,7 +180,7 @@ func (zook *ZooKeeper) childrenRecursiveInternal(connection *zk.Conn, path strin for _, child := range children { incrementalChild := gopath.Join(incrementalPath, child) recursiveChildren = append(recursiveChildren, incrementalChild) - log.Debugf("incremental child: %+v", incrementalChild) + log.Infof("incremental child: %+v", incrementalChild) incrementalChildren, err := zook.childrenRecursiveInternal(connection, gopath.Join(path, child), incrementalChild) if err != nil { return children, err @@ -210,12 +210,12 @@ func (zook *ZooKeeper) createInternal(connection *zk.Conn, path string, data []b return "/", nil } - log.Debugf("creating: %s", path) + log.Infof("creating: %s", path) attempts := 0 for { - attempts += 1 + attempts++ returnValue, err := connection.Create(path, data, zook.flags, zook.acl) - log.Debugf("create status for %s: %s, %+v", path, returnValue, err) + log.Infof("create status for %s: %s, %+v", path, returnValue, err) if err != nil && force && attempts < 2 { parentPath := gopath.Dir(path) @@ -234,12 +234,12 @@ func (zook *ZooKeeper) createInternalWithACL(connection *zk.Conn, path string, d if path == "/" { return "/", nil } - log.Debugf("creating: %s with acl ", path) + log.Infof("creating: %s with acl ", path) attempts := 0 for { - attempts += 1 + attempts++ returnValue, err := connection.Create(path, data, zook.flags, perms) - log.Debugf("create status for %s: %s, %+v", path, returnValue, err) + log.Infof("create status for %s: %s, %+v", path, returnValue, err) if err != nil && force && attempts < 2 { _, _ = zook.createInternalWithACL(connection, gopath.Dir(path), []byte("zookeepercli auto-generated"), force, perms) } else { @@ -384,13 +384,13 @@ func (zook *ZooKeeper) Delete(path string) error { func (zook *ZooKeeper) DeleteRecursive(path string) error { result, err := zook.ChildrenRecursive(path) if err != nil { - log.Fatale(err) + log.Fatal(err) } for i := len(result) - 1; i >= 0; i-- { znode := path + "/" + result[i] if err = zook.Delete(znode); err != nil { - log.Fatale(err) + log.Fatal(err) } } diff --git a/go/vt/orchestrator/http/api.go b/go/vt/orchestrator/http/api.go index 9d31f7c3f37..66f9e704745 100644 --- a/go/vt/orchestrator/http/api.go +++ b/go/vt/orchestrator/http/api.go @@ -29,7 +29,7 @@ import ( "github.com/martini-contrib/auth" "github.com/martini-contrib/render" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" + "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/util" "vitess.io/vitess/go/vt/orchestrator/collection" @@ -1950,7 +1950,7 @@ func (httpAPI *API) DiscoveryMetricsRaw(params martini.Params, r render.Render, Respond(r, &APIResponse{Code: ERROR, Message: "Unable to determine start time. Perhaps seconds value is wrong?"}) return } - log.Debugf("DiscoveryMetricsRaw data: retrieved %d entries from discovery.MC", len(json)) + log.Infof("DiscoveryMetricsRaw data: retrieved %d entries from discovery.MC", len(json)) r.JSON(http.StatusOK, json) } @@ -1974,7 +1974,7 @@ func (httpAPI *API) DiscoveryMetricsAggregated(params martini.Params, r render.R // queued values), data taken secondly for the last N seconds. func (httpAPI *API) DiscoveryQueueMetricsRaw(params martini.Params, r render.Render, req *http.Request, user auth.User) { seconds, err := strconv.Atoi(params["seconds"]) - log.Debugf("DiscoveryQueueMetricsRaw: seconds: %d", seconds) + log.Infof("DiscoveryQueueMetricsRaw: seconds: %d", seconds) if err != nil { Respond(r, &APIResponse{Code: ERROR, Message: "Unable to generate discovery queue aggregated metrics"}) return @@ -1982,7 +1982,7 @@ func (httpAPI *API) DiscoveryQueueMetricsRaw(params martini.Params, r render.Ren queue := discovery.CreateOrReturnQueue("DEFAULT") metrics := queue.DiscoveryQueueMetrics(seconds) - log.Debugf("DiscoveryQueueMetricsRaw data: %+v", metrics) + log.Infof("DiscoveryQueueMetricsRaw data: %+v", metrics) r.JSON(http.StatusOK, metrics) } @@ -1992,7 +1992,7 @@ func (httpAPI *API) DiscoveryQueueMetricsRaw(params martini.Params, r render.Ren // See go/discovery/ for more information. func (httpAPI *API) DiscoveryQueueMetricsAggregated(params martini.Params, r render.Render, req *http.Request, user auth.User) { seconds, err := strconv.Atoi(params["seconds"]) - log.Debugf("DiscoveryQueueMetricsAggregated: seconds: %d", seconds) + log.Infof("DiscoveryQueueMetricsAggregated: seconds: %d", seconds) if err != nil { Respond(r, &APIResponse{Code: ERROR, Message: "Unable to generate discovery queue aggregated metrics"}) return @@ -2000,7 +2000,7 @@ func (httpAPI *API) DiscoveryQueueMetricsAggregated(params martini.Params, r ren queue := discovery.CreateOrReturnQueue("DEFAULT") aggregated := queue.AggregatedDiscoveryQueueMetrics(seconds) - log.Debugf("DiscoveryQueueMetricsAggregated data: %+v", aggregated) + log.Infof("DiscoveryQueueMetricsAggregated data: %+v", aggregated) r.JSON(http.StatusOK, aggregated) } @@ -2008,7 +2008,7 @@ func (httpAPI *API) DiscoveryQueueMetricsAggregated(params martini.Params, r ren // BackendQueryMetricsRaw returns the raw backend query metrics func (httpAPI *API) BackendQueryMetricsRaw(params martini.Params, r render.Render, req *http.Request, user auth.User) { seconds, err := strconv.Atoi(params["seconds"]) - log.Debugf("BackendQueryMetricsRaw: seconds: %d", seconds) + log.Infof("BackendQueryMetricsRaw: seconds: %d", seconds) if err != nil { Respond(r, &APIResponse{Code: ERROR, Message: "Unable to generate raw backend query metrics"}) return @@ -2021,14 +2021,14 @@ func (httpAPI *API) BackendQueryMetricsRaw(params martini.Params, r render.Rende return } - log.Debugf("BackendQueryMetricsRaw data: %+v", m) + log.Infof("BackendQueryMetricsRaw data: %+v", m) r.JSON(http.StatusOK, m) } func (httpAPI *API) BackendQueryMetricsAggregated(params martini.Params, r render.Render, req *http.Request, user auth.User) { seconds, err := strconv.Atoi(params["seconds"]) - log.Debugf("BackendQueryMetricsAggregated: seconds: %d", seconds) + log.Infof("BackendQueryMetricsAggregated: seconds: %d", seconds) if err != nil { Respond(r, &APIResponse{Code: ERROR, Message: "Unable to aggregated generate backend query metrics"}) return @@ -2036,7 +2036,7 @@ func (httpAPI *API) BackendQueryMetricsAggregated(params martini.Params, r rende refTime := time.Now().Add(-time.Duration(seconds) * time.Second) aggregated := query.AggregatedSince(queryMetrics, refTime) - log.Debugf("BackendQueryMetricsAggregated data: %+v", aggregated) + log.Infof("BackendQueryMetricsAggregated data: %+v", aggregated) r.JSON(http.StatusOK, aggregated) } @@ -2044,7 +2044,7 @@ func (httpAPI *API) BackendQueryMetricsAggregated(params martini.Params, r rende // WriteBufferMetricsRaw returns the raw instance write buffer metrics func (httpAPI *API) WriteBufferMetricsRaw(params martini.Params, r render.Render, req *http.Request, user auth.User) { seconds, err := strconv.Atoi(params["seconds"]) - log.Debugf("WriteBufferMetricsRaw: seconds: %d", seconds) + log.Infof("WriteBufferMetricsRaw: seconds: %d", seconds) if err != nil { Respond(r, &APIResponse{Code: ERROR, Message: "Unable to generate raw instance write buffer metrics"}) return @@ -2057,7 +2057,7 @@ func (httpAPI *API) WriteBufferMetricsRaw(params martini.Params, r render.Render return } - log.Debugf("WriteBufferMetricsRaw data: %+v", m) + log.Infof("WriteBufferMetricsRaw data: %+v", m) r.JSON(http.StatusOK, m) } @@ -2065,7 +2065,7 @@ func (httpAPI *API) WriteBufferMetricsRaw(params martini.Params, r render.Render // WriteBufferMetricsAggregated provides aggregate metrics of instance write buffer metrics func (httpAPI *API) WriteBufferMetricsAggregated(params martini.Params, r render.Render, req *http.Request, user auth.User) { seconds, err := strconv.Atoi(params["seconds"]) - log.Debugf("WriteBufferMetricsAggregated: seconds: %d", seconds) + log.Infof("WriteBufferMetricsAggregated: seconds: %d", seconds) if err != nil { Respond(r, &APIResponse{Code: ERROR, Message: "Unable to aggregated instance write buffer metrics"}) return @@ -2073,7 +2073,7 @@ func (httpAPI *API) WriteBufferMetricsAggregated(params martini.Params, r render refTime := time.Now().Add(-time.Duration(seconds) * time.Second) aggregated := inst.AggregatedSince(writeBufferMetrics, refTime) - log.Debugf("WriteBufferMetricsAggregated data: %+v", aggregated) + log.Infof("WriteBufferMetricsAggregated data: %+v", aggregated) r.JSON(http.StatusOK, aggregated) } diff --git a/go/vt/orchestrator/http/api_test.go b/go/vt/orchestrator/http/api_test.go index aff39b3cca0..f9bac482c59 100644 --- a/go/vt/orchestrator/http/api_test.go +++ b/go/vt/orchestrator/http/api_test.go @@ -7,14 +7,12 @@ import ( "github.com/go-martini/martini" "vitess.io/vitess/go/vt/orchestrator/config" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" test "vitess.io/vitess/go/vt/orchestrator/external/golib/tests" ) func init() { config.Config.HostnameResolveMethod = "none" config.MarkConfigurationLoaded() - log.SetLevel(log.ERROR) } func TestKnownPaths(t *testing.T) { diff --git a/go/vt/orchestrator/inst/analysis_dao.go b/go/vt/orchestrator/inst/analysis_dao.go index 513c891a86b..97363a79fe1 100644 --- a/go/vt/orchestrator/inst/analysis_dao.go +++ b/go/vt/orchestrator/inst/analysis_dao.go @@ -21,6 +21,8 @@ import ( "regexp" "time" + "vitess.io/vitess/go/vt/log" + "google.golang.org/protobuf/encoding/prototext" "vitess.io/vitess/go/vt/orchestrator/config" @@ -34,7 +36,6 @@ import ( "github.com/patrickmn/go-cache" "github.com/rcrowley/go-metrics" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" ) @@ -44,8 +45,8 @@ var analysisChangeWriteCounter = metrics.NewCounter() var recentInstantAnalysis *cache.Cache func init() { - metrics.Register("analysis.change.write.attempt", analysisChangeWriteAttemptCounter) - metrics.Register("analysis.change.write", analysisChangeWriteCounter) + _ = metrics.Register("analysis.change.write.attempt", analysisChangeWriteAttemptCounter) + _ = metrics.Register("analysis.change.write", analysisChangeWriteCounter) go initializeAnalysisDaoPostConfiguration() } @@ -436,7 +437,7 @@ func GetReplicationAnalysis(clusterName string, hints *ReplicationAnalysisHints) a.ClusterDetails.ReadRecoveryInfo() a.Replicas = *NewInstanceKeyMap() - a.Replicas.ReadCommaDelimitedList(m.GetString("replica_hosts")) + _ = a.Replicas.ReadCommaDelimitedList(m.GetString("replica_hosts")) countValidOracleGTIDReplicas := m.GetUint("count_valid_oracle_gtid_replicas") a.OracleGTIDImmediateTopology = countValidOracleGTIDReplicas == a.CountValidReplicas && a.CountValidReplicas > 0 @@ -472,7 +473,7 @@ func GetReplicationAnalysis(clusterName string, hints *ReplicationAnalysisHints) a.ClusterDetails.ClusterName, a.IsPrimary, a.LastCheckValid, a.LastCheckPartialSuccess, a.CountReplicas, a.CountValidReplicas, a.CountValidReplicatingReplicas, a.CountLaggingReplicas, a.CountDelayedReplicas, a.CountReplicasFailingToConnectToPrimary, ) if util.ClearToLog("analysis_dao", analysisMessage) { - log.Debugf(analysisMessage) + log.Infof(analysisMessage) } } if clusters[a.SuggestedClusterAlias] == nil { @@ -694,10 +695,10 @@ func GetReplicationAnalysis(clusterName string, hints *ReplicationAnalysisHints) }) if err != nil { - return result, log.Errore(err) + log.Error(err) } // TODO: result, err = getConcensusReplicationAnalysis(result) - return result, log.Errore(err) + return result, err } // auditInstanceAnalysisInChangelog will write down an instance's analysis in the database_instance_analysis_changelog table. @@ -731,11 +732,13 @@ func auditInstanceAnalysisInChangelog(instanceKey *InstanceKey, analysisCode Ana string(analysisCode), instanceKey.Hostname, instanceKey.Port, string(analysisCode), ) if err != nil { - return log.Errore(err) + log.Error(err) + return err } rows, err := sqlResult.RowsAffected() if err != nil { - return log.Errore(err) + log.Error(err) + return err } lastAnalysisChanged = (rows > 0) } @@ -750,7 +753,8 @@ func auditInstanceAnalysisInChangelog(instanceKey *InstanceKey, analysisCode Ana instanceKey.Hostname, instanceKey.Port, string(analysisCode), ) if err != nil { - return log.Errore(err) + log.Error(err) + return err } } recentInstantAnalysis.Set(instanceKey.DisplayString(), analysisCode, cache.DefaultExpiration) @@ -770,7 +774,8 @@ func auditInstanceAnalysisInChangelog(instanceKey *InstanceKey, analysisCode Ana if err == nil { analysisChangeWriteCounter.Inc(1) } - return log.Errore(err) + log.Error(err) + return err } // ExpireInstanceAnalysisChangelog removes old-enough analysis entries from the changelog @@ -783,7 +788,8 @@ func ExpireInstanceAnalysisChangelog() error { `, config.Config.UnseenInstanceForgetHours, ) - return log.Errore(err) + log.Error(err) + return err } // ReadReplicationAnalysisChangelog @@ -814,33 +820,7 @@ func ReadReplicationAnalysisChangelog() (res [](*ReplicationAnalysisChangelog), }) if err != nil { - log.Errore(err) + log.Error(err) } return res, err } - -// ReadPeerAnalysisMap reads raft-peer failure analysis, and returns a PeerAnalysisMap, -// indicating how many peers see which analysis -func ReadPeerAnalysisMap() (peerAnalysisMap PeerAnalysisMap, err error) { - peerAnalysisMap = make(PeerAnalysisMap) - query := ` - select - hostname, - port, - analysis - from - database_instance_peer_analysis - order by - peer, hostname, port - ` - err = db.QueryOrchestratorRowsMap(query, func(m sqlutils.RowMap) error { - instanceKey := InstanceKey{Hostname: m.GetString("hostname"), Port: m.GetInt("port")} - analysis := m.GetString("analysis") - instanceAnalysis := NewInstanceAnalysis(&instanceKey, AnalysisCode(analysis)) - mapKey := instanceAnalysis.String() - peerAnalysisMap[mapKey] = peerAnalysisMap[mapKey] + 1 - - return nil - }) - return peerAnalysisMap, log.Errore(err) -} diff --git a/go/vt/orchestrator/inst/analysis_test.go b/go/vt/orchestrator/inst/analysis_test.go index 203d93ede31..269aeda99e4 100644 --- a/go/vt/orchestrator/inst/analysis_test.go +++ b/go/vt/orchestrator/inst/analysis_test.go @@ -20,14 +20,12 @@ import ( "testing" "vitess.io/vitess/go/vt/orchestrator/config" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" test "vitess.io/vitess/go/vt/orchestrator/external/golib/tests" ) func init() { config.Config.HostnameResolveMethod = "none" config.MarkConfigurationLoaded() - log.SetLevel(log.ERROR) } func TestGetAnalysisInstanceType(t *testing.T) { diff --git a/go/vt/orchestrator/inst/audit_dao.go b/go/vt/orchestrator/inst/audit_dao.go index 4e377fbd462..43e96e84b90 100644 --- a/go/vt/orchestrator/inst/audit_dao.go +++ b/go/vt/orchestrator/inst/audit_dao.go @@ -22,11 +22,12 @@ import ( "os" "time" + "vitess.io/vitess/go/vt/log" + "github.com/rcrowley/go-metrics" "vitess.io/vitess/go/vt/orchestrator/config" "vitess.io/vitess/go/vt/orchestrator/db" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" ) @@ -64,13 +65,15 @@ func AuditOperation(auditType string, instanceKey *InstanceKey, message string) go func() error { f, err := os.OpenFile(config.Config.AuditLogFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0640) if err != nil { - return log.Errore(err) + log.Error(err) + return err } defer f.Close() - text := fmt.Sprintf("%s\t%s\t%s\t%d\t[%s]\t%s\t\n", time.Now().Format(log.TimeFormat), auditType, instanceKey.Hostname, instanceKey.Port, clusterName, message) + text := fmt.Sprintf("%s\t%s\t%s\t%d\t[%s]\t%s\t\n", time.Now().Format("2006-01-02 15:04:05"), auditType, instanceKey.Hostname, instanceKey.Port, clusterName, message) if _, err = f.WriteString(text); err != nil { - return log.Errore(err) + log.Error(err) + return err } return nil }() @@ -91,7 +94,8 @@ func AuditOperation(auditType string, instanceKey *InstanceKey, message string) message, ) if err != nil { - return log.Errore(err) + log.Error(err) + return err } } logMessage := fmt.Sprintf("auditType:%s instance:%s cluster:%s message:%s", auditType, instanceKey.DisplayString(), clusterName, message) @@ -149,7 +153,7 @@ func ReadRecentAudit(instanceKey *InstanceKey, page int) ([]Audit, error) { }) if err != nil { - log.Errore(err) + log.Error(err) } return res, err diff --git a/go/vt/orchestrator/inst/binlog_test.go b/go/vt/orchestrator/inst/binlog_test.go index 0e3d829aed4..713434999c9 100644 --- a/go/vt/orchestrator/inst/binlog_test.go +++ b/go/vt/orchestrator/inst/binlog_test.go @@ -4,7 +4,6 @@ import ( "testing" "vitess.io/vitess/go/vt/orchestrator/config" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" test "vitess.io/vitess/go/vt/orchestrator/external/golib/tests" ) @@ -13,7 +12,6 @@ var testCoordinates = BinlogCoordinates{LogFile: "mysql-bin.000010", LogPos: 108 func init() { config.Config.HostnameResolveMethod = "none" config.MarkConfigurationLoaded() - log.SetLevel(log.ERROR) } func TestDetach(t *testing.T) { diff --git a/go/vt/orchestrator/inst/candidate_database_instance_dao.go b/go/vt/orchestrator/inst/candidate_database_instance_dao.go index 2479cb56a2b..17153214228 100644 --- a/go/vt/orchestrator/inst/candidate_database_instance_dao.go +++ b/go/vt/orchestrator/inst/candidate_database_instance_dao.go @@ -17,7 +17,7 @@ package inst import ( - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" + "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" "vitess.io/vitess/go/vt/orchestrator/config" @@ -46,7 +46,8 @@ func RegisterCandidateInstance(candidate *CandidateDatabaseInstance) error { ` writeFunc := func() error { _, err := db.ExecOrchestrator(query, args...) - return log.Errore(err) + log.Error(err) + return err } return ExecDBWriteFunc(writeFunc) } @@ -59,7 +60,8 @@ func ExpireCandidateInstances() error { where last_suggested < NOW() - INTERVAL ? MINUTE `, config.Config.CandidateInstanceExpireMinutes, ) - return log.Errore(err) + log.Error(err) + return err } return ExecDBWriteFunc(writeFunc) } diff --git a/go/vt/orchestrator/inst/cluster_alias_dao.go b/go/vt/orchestrator/inst/cluster_alias_dao.go index fd58c52032f..a20f7900cf8 100644 --- a/go/vt/orchestrator/inst/cluster_alias_dao.go +++ b/go/vt/orchestrator/inst/cluster_alias_dao.go @@ -19,8 +19,9 @@ package inst import ( "fmt" + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/orchestrator/db" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" ) @@ -89,7 +90,8 @@ func writeClusterAlias(clusterName string, alias string) error { (?, ?, now()) `, clusterName, alias) - return log.Errore(err) + log.Error(err) + return err } return ExecDBWriteFunc(writeFunc) } @@ -104,7 +106,8 @@ func writeClusterAliasManualOverride(clusterName string, alias string) error { (?, ?) `, clusterName, alias) - return log.Errore(err) + log.Error(err) + return err } return ExecDBWriteFunc(writeFunc) } @@ -136,7 +139,8 @@ func UpdateClusterAliases() error { read_only desc, num_replica_hosts asc `, DowntimeLostInRecoveryMessage) - return log.Errore(err) + log.Error(err) + return err } if err := ExecDBWriteFunc(writeFunc); err != nil { return err @@ -155,7 +159,8 @@ func UpdateClusterAliases() error { having sum(suggested_cluster_alias = '') = count(*) `) - return log.Errore(err) + log.Error(err) + return err } if err := ExecDBWriteFunc(writeFunc); err != nil { return err @@ -174,7 +179,8 @@ func ReplaceAliasClusterName(oldClusterName string, newClusterName string) (err where cluster_name = ? `, newClusterName, oldClusterName) - return log.Errore(err) + log.Error(err) + return err } err = ExecDBWriteFunc(writeFunc) } @@ -186,7 +192,8 @@ func ReplaceAliasClusterName(oldClusterName string, newClusterName string) (err where cluster_name = ? `, newClusterName, oldClusterName) - return log.Errore(err) + log.Error(err) + return err } if ferr := ExecDBWriteFunc(writeFunc); ferr != nil { err = ferr diff --git a/go/vt/orchestrator/inst/cluster_domain_dao.go b/go/vt/orchestrator/inst/cluster_domain_dao.go index 74f8c9c79e8..ad8f9d6e8f8 100644 --- a/go/vt/orchestrator/inst/cluster_domain_dao.go +++ b/go/vt/orchestrator/inst/cluster_domain_dao.go @@ -17,9 +17,9 @@ package inst import ( + "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/orchestrator/config" "vitess.io/vitess/go/vt/orchestrator/db" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" ) // WriteClusterDomainName will write (and override) the domain name of a cluster @@ -35,7 +35,8 @@ func WriteClusterDomainName(clusterName string, domainName string) error { last_registered=values(last_registered) `, clusterName, domainName) - return log.Errore(err) + log.Error(err) + return err } return ExecDBWriteFunc(writeFunc) } @@ -48,7 +49,8 @@ func ExpireClusterDomainName() error { where last_registered < NOW() - INTERVAL ? MINUTE `, config.Config.ExpiryHostnameResolvesMinutes, ) - return log.Errore(err) + log.Error(err) + return err } return ExecDBWriteFunc(writeFunc) } diff --git a/go/vt/orchestrator/inst/downtime_dao.go b/go/vt/orchestrator/inst/downtime_dao.go index ecc68977d22..8f41da56315 100644 --- a/go/vt/orchestrator/inst/downtime_dao.go +++ b/go/vt/orchestrator/inst/downtime_dao.go @@ -20,10 +20,10 @@ import ( "fmt" "time" + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/orchestrator/config" "vitess.io/vitess/go/vt/orchestrator/db" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" - "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" ) // BeginDowntime will make mark an instance as downtimed (or override existing downtime period) @@ -81,9 +81,10 @@ func BeginDowntime(downtime *Downtime) (err error) { ) } if err != nil { - return log.Errore(err) + log.Error(err) + return err } - AuditOperation("begin-downtime", downtime.Key, fmt.Sprintf("owner: %s, reason: %s", downtime.Owner, downtime.Reason)) + _ = AuditOperation("begin-downtime", downtime.Key, fmt.Sprintf("owner: %s, reason: %s", downtime.Owner, downtime.Reason)) return nil } @@ -101,12 +102,13 @@ func EndDowntime(instanceKey *InstanceKey) (wasDowntimed bool, err error) { instanceKey.Port, ) if err != nil { - return wasDowntimed, log.Errore(err) + log.Error(err) + return wasDowntimed, err } if affected, _ := res.RowsAffected(); affected > 0 { wasDowntimed = true - AuditOperation("end-downtime", instanceKey, "") + _ = AuditOperation("end-downtime", instanceKey, "") } return wasDowntimed, err } @@ -180,10 +182,12 @@ func expireLostInRecoveryDowntime() error { // ExpireDowntime will remove the maintenance flag on old downtimes func ExpireDowntime() error { if err := renewLostInRecoveryDowntime(); err != nil { - return log.Errore(err) + log.Error(err) + return err } if err := expireLostInRecoveryDowntime(); err != nil { - return log.Errore(err) + log.Error(err) + return err } { res, err := db.ExecOrchestrator(` @@ -194,47 +198,13 @@ func ExpireDowntime() error { `, ) if err != nil { - return log.Errore(err) + log.Error(err) + return err } if rowsAffected, _ := res.RowsAffected(); rowsAffected > 0 { - AuditOperation("expire-downtime", nil, fmt.Sprintf("Expired %d entries", rowsAffected)) + _ = AuditOperation("expire-downtime", nil, fmt.Sprintf("Expired %d entries", rowsAffected)) } } return nil } - -func ReadDowntime() (result []Downtime, err error) { - query := ` - select - hostname, - port, - begin_timestamp, - end_timestamp, - owner, - reason - from - database_instance_downtime - where - end_timestamp > now() - ` - err = db.QueryOrchestratorRowsMap(query, func(m sqlutils.RowMap) error { - downtime := Downtime{ - Key: &InstanceKey{}, - } - downtime.Key.Hostname = m.GetString("hostname") - downtime.Key.Port = m.GetInt("port") - downtime.BeginsAt = m.GetTime("begin_timestamp") - downtime.EndsAt = m.GetTime("end_timestamp") - downtime.BeginsAtString = m.GetString("begin_timestamp") - downtime.EndsAtString = m.GetString("end_timestamp") - downtime.Owner = m.GetString("owner") - downtime.Reason = m.GetString("reason") - - downtime.Duration = downtime.EndsAt.Sub(downtime.BeginsAt) - - result = append(result, downtime) - return nil - }) - return result, log.Errore(err) -} diff --git a/go/vt/orchestrator/inst/instance_binlog.go b/go/vt/orchestrator/inst/instance_binlog.go index adf0d4041fe..9ccf13a207f 100644 --- a/go/vt/orchestrator/inst/instance_binlog.go +++ b/go/vt/orchestrator/inst/instance_binlog.go @@ -20,7 +20,7 @@ import ( "errors" "regexp" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" + "vitess.io/vitess/go/vt/log" ) // Event entries may contains table IDs (can be different for same tables on different servers) @@ -73,31 +73,13 @@ type BinlogEventCursor struct { nextCoordinates BinlogCoordinates } -// fetchNextEventsFunc expected to return events starting at a given position, and automatically fetch those from next -// binary log when no more rows are found in current log. -// It is expected to return empty array with no error upon end of binlogs -// It is expected to return error upon error... -func NewBinlogEventCursor(startCoordinates BinlogCoordinates, fetchNextEventsFunc func(BinlogCoordinates) ([]BinlogEvent, error)) BinlogEventCursor { - events, _ := fetchNextEventsFunc(startCoordinates) - var initialNextCoordinates BinlogCoordinates - if len(events) > 0 { - initialNextCoordinates = events[0].NextBinlogCoordinates() - } - return BinlogEventCursor{ - cachedEvents: events, - currentEventIndex: -1, - fetchNextEvents: fetchNextEventsFunc, - nextCoordinates: initialNextCoordinates, - } -} - // nextEvent will return the next event entry from binary logs; it will automatically skip to next // binary log if need be. // Internally, it uses the cachedEvents array, so that it does not go to the MySQL server upon each call. // Returns nil upon reaching end of binary logs. func (binlogEventCursor *BinlogEventCursor) nextEvent(numEmptyEventsEvents int) (*BinlogEvent, error) { if numEmptyEventsEvents > maxEmptyEventsEvents { - log.Debugf("End of logs. currentEventIndex: %d, nextCoordinates: %+v", binlogEventCursor.currentEventIndex, binlogEventCursor.nextCoordinates) + log.Infof("End of logs. currentEventIndex: %d, nextCoordinates: %+v", binlogEventCursor.currentEventIndex, binlogEventCursor.nextCoordinates) // End of logs return nil, nil } @@ -107,7 +89,7 @@ func (binlogEventCursor *BinlogEventCursor) nextEvent(numEmptyEventsEvents int) if err != nil { return nil, err } - log.Debugf("zero cached events, next file: %+v", nextFileCoordinates) + log.Infof("zero cached events, next file: %+v", nextFileCoordinates) binlogEventCursor.cachedEvents, err = binlogEventCursor.fetchNextEvents(nextFileCoordinates) if err != nil { return nil, err diff --git a/go/vt/orchestrator/inst/instance_binlog_dao.go b/go/vt/orchestrator/inst/instance_binlog_dao.go index 4603b16b408..de5a46faa0f 100644 --- a/go/vt/orchestrator/inst/instance_binlog_dao.go +++ b/go/vt/orchestrator/inst/instance_binlog_dao.go @@ -19,14 +19,17 @@ package inst import ( "fmt" + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/orchestrator/db" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" ) func GetPreviousGTIDs(instanceKey *InstanceKey, binlog string) (previousGTIDs *OracleGtidSet, err error) { if binlog == "" { - return nil, log.Errorf("GetPreviousGTIDs: empty binlog file name for %+v", *instanceKey) + errMsg := fmt.Sprintf("GetPreviousGTIDs: empty binlog file name for %+v", *instanceKey) + log.Errorf(errMsg) + return nil, fmt.Errorf(errMsg) } db, err := db.OpenTopology(instanceKey.Hostname, instanceKey.Port) if err != nil { diff --git a/go/vt/orchestrator/inst/instance_dao.go b/go/vt/orchestrator/inst/instance_dao.go index 5da0d7ba2fe..436ae2c82ef 100644 --- a/go/vt/orchestrator/inst/instance_dao.go +++ b/go/vt/orchestrator/inst/instance_dao.go @@ -29,6 +29,8 @@ import ( "sync" "time" + "vitess.io/vitess/go/vt/log" + vitessmysql "vitess.io/vitess/go/mysql" replicationdatapb "vitess.io/vitess/go/vt/proto/replicationdata" @@ -39,7 +41,6 @@ import ( "github.com/sjmudd/stopwatch" "vitess.io/vitess/go/tb" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/math" "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" @@ -117,11 +118,11 @@ var writeBufferLatency = stopwatch.NewNamedStopwatch() var emptyQuotesRegexp = regexp.MustCompile(`^""$`) func init() { - metrics.Register("instance.access_denied", accessDeniedCounter) - metrics.Register("instance.read_topology", readTopologyInstanceCounter) - metrics.Register("instance.read", readInstanceCounter) - metrics.Register("instance.write", writeInstanceCounter) - writeBufferLatency.AddMany([]string{"wait", "write"}) + _ = metrics.Register("instance.access_denied", accessDeniedCounter) + _ = metrics.Register("instance.read_topology", readTopologyInstanceCounter) + _ = metrics.Register("instance.read", readInstanceCounter) + _ = metrics.Register("instance.write", writeInstanceCounter) + _ = writeBufferLatency.AddMany([]string{"wait", "write"}) writeBufferLatency.Start("wait") go initializeInstanceDao() @@ -168,7 +169,7 @@ func ExecDBWriteFunc(f func() error) error { } } m.ExecuteLatency = time.Since(m.Timestamp.Add(m.WaitLatency)) - backendWrites.Append(m) + _ = backendWrites.Append(m) <-instanceWriteChan // assume this takes no time }() res := f() @@ -203,7 +204,8 @@ func logReadTopologyInstanceError(instanceKey *InstanceKey, hint string, err err strings.Replace(hint, "%", "%%", -1), // escape % err) } - return log.Errorf(msg) + log.Errorf(msg) + return fmt.Errorf(msg) } // ReadTopologyInstance collects information on the state of a MySQL @@ -436,11 +438,11 @@ func ReadTopologyInstanceBufferable(instanceKey *InstanceKey, bufferWrites bool, primaryHostname := fullStatus.ReplicationStatus.SourceHost primaryKey, err := NewResolveInstanceKey(primaryHostname, int(fullStatus.ReplicationStatus.SourcePort)) if err != nil { - logReadTopologyInstanceError(instanceKey, "NewResolveInstanceKey", err) + _ = logReadTopologyInstanceError(instanceKey, "NewResolveInstanceKey", err) } primaryKey.Hostname, resolveErr = ResolveHostname(primaryKey.Hostname) if resolveErr != nil { - logReadTopologyInstanceError(instanceKey, fmt.Sprintf("ResolveHostname(%q)", primaryKey.Hostname), resolveErr) + _ = logReadTopologyInstanceError(instanceKey, fmt.Sprintf("ResolveHostname(%q)", primaryKey.Hostname), resolveErr) } instance.SourceKey = *primaryKey instance.IsDetachedPrimary = instance.SourceKey.IsDetached() @@ -480,7 +482,7 @@ func ReadTopologyInstanceBufferable(instanceKey *InstanceKey, bufferWrites bool, } } else { instance.ReplicationLagSeconds = instance.SecondsBehindPrimary - logReadTopologyInstanceError(instanceKey, "ReplicationLagQuery", err) + _ = logReadTopologyInstanceError(instanceKey, "ReplicationLagQuery", err) } }() } @@ -513,7 +515,7 @@ func ReadTopologyInstanceBufferable(instanceKey *InstanceKey, bufferWrites bool, return err }) - logReadTopologyInstanceError(instanceKey, "show slave hosts", err) + _ = logReadTopologyInstanceError(instanceKey, "show slave hosts", err) } if !foundByShowSlaveHosts { // Either not configured to read SHOW SLAVE HOSTS or nothing was there. @@ -532,7 +534,7 @@ func ReadTopologyInstanceBufferable(instanceKey *InstanceKey, bufferWrites bool, func(m sqlutils.RowMap) error { cname, resolveErr := ResolveHostname(m.GetString("slave_hostname")) if resolveErr != nil { - logReadTopologyInstanceError(instanceKey, "ResolveHostname: processlist", resolveErr) + _ = logReadTopologyInstanceError(instanceKey, "ResolveHostname: processlist", resolveErr) } replicaKey := InstanceKey{Hostname: cname, Port: instance.Key.Port} if !RegexpMatchPatterns(replicaKey.StringCode(), config.Config.DiscoveryIgnoreReplicaHostnameFilters) { @@ -541,7 +543,7 @@ func ReadTopologyInstanceBufferable(instanceKey *InstanceKey, bufferWrites bool, return err }) - logReadTopologyInstanceError(instanceKey, "processlist", err) + _ = logReadTopologyInstanceError(instanceKey, "processlist", err) }() } @@ -561,14 +563,14 @@ func ReadTopologyInstanceBufferable(instanceKey *InstanceKey, bufferWrites bool, func(m sqlutils.RowMap) error { cname, resolveErr := ResolveHostname(m.GetString("mysql_host")) if resolveErr != nil { - logReadTopologyInstanceError(instanceKey, "ResolveHostname: ndbinfo", resolveErr) + _ = logReadTopologyInstanceError(instanceKey, "ResolveHostname: ndbinfo", resolveErr) } replicaKey := InstanceKey{Hostname: cname, Port: instance.Key.Port} instance.AddReplicaKey(&replicaKey) return err }) - logReadTopologyInstanceError(instanceKey, "ndbinfo", err) + _ = logReadTopologyInstanceError(instanceKey, "ndbinfo", err) }() } @@ -578,7 +580,7 @@ func ReadTopologyInstanceBufferable(instanceKey *InstanceKey, bufferWrites bool, go func() { defer waitGroup.Done() err := db.QueryRow(config.Config.DetectDataCenterQuery).Scan(&instance.DataCenter) - logReadTopologyInstanceError(instanceKey, "DetectDataCenterQuery", err) + _ = logReadTopologyInstanceError(instanceKey, "DetectDataCenterQuery", err) }() } instance.DataCenter = tablet.Alias.Cell @@ -589,7 +591,7 @@ func ReadTopologyInstanceBufferable(instanceKey *InstanceKey, bufferWrites bool, go func() { defer waitGroup.Done() err := db.QueryRow(config.Config.DetectRegionQuery).Scan(&instance.Region) - logReadTopologyInstanceError(instanceKey, "DetectRegionQuery", err) + _ = logReadTopologyInstanceError(instanceKey, "DetectRegionQuery", err) }() } @@ -598,7 +600,7 @@ func ReadTopologyInstanceBufferable(instanceKey *InstanceKey, bufferWrites bool, go func() { defer waitGroup.Done() err := db.QueryRow(config.Config.DetectPhysicalEnvironmentQuery).Scan(&instance.PhysicalEnvironment) - logReadTopologyInstanceError(instanceKey, "DetectPhysicalEnvironmentQuery", err) + _ = logReadTopologyInstanceError(instanceKey, "DetectPhysicalEnvironmentQuery", err) }() } @@ -608,7 +610,7 @@ func ReadTopologyInstanceBufferable(instanceKey *InstanceKey, bufferWrites bool, go func() { defer waitGroup.Done() err := db.QueryRow(config.Config.DetectInstanceAliasQuery).Scan(&instance.InstanceAlias) - logReadTopologyInstanceError(instanceKey, "DetectInstanceAliasQuery", err) + _ = logReadTopologyInstanceError(instanceKey, "DetectInstanceAliasQuery", err) }() } instance.InstanceAlias = topoproto.TabletAliasString(tablet.Alias) @@ -619,7 +621,7 @@ func ReadTopologyInstanceBufferable(instanceKey *InstanceKey, bufferWrites bool, go func() { defer waitGroup.Done() err := db.QueryRow(config.Config.DetectSemiSyncEnforcedQuery).Scan(&instance.SemiSyncEnforced) - logReadTopologyInstanceError(instanceKey, "DetectSemiSyncEnforcedQuery", err) + _ = logReadTopologyInstanceError(instanceKey, "DetectSemiSyncEnforcedQuery", err) }() } @@ -627,7 +629,7 @@ func ReadTopologyInstanceBufferable(instanceKey *InstanceKey, bufferWrites bool, latency.Start("backend") err = ReadInstanceClusterAttributes(instance) latency.Stop("backend") - logReadTopologyInstanceError(instanceKey, "ReadInstanceClusterAttributes", err) + _ = logReadTopologyInstanceError(instanceKey, "ReadInstanceClusterAttributes", err) } // We need to update candidate_database_instance. @@ -635,7 +637,7 @@ func ReadTopologyInstanceBufferable(instanceKey *InstanceKey, bufferWrites bool, // to bump the last_suggested time. instance.PromotionRule = PromotionRule(durability, tablet) err = RegisterCandidateInstance(NewCandidateDatabaseInstance(instanceKey, instance.PromotionRule).WithCurrentTime()) - logReadTopologyInstanceError(instanceKey, "RegisterCandidateInstance", err) + _ = logReadTopologyInstanceError(instanceKey, "RegisterCandidateInstance", err) // TODO(sougou): delete cluster_alias_override metadata instance.SuggestedClusterAlias = fmt.Sprintf("%v:%v", tablet.Keyspace, tablet.Shard) @@ -645,13 +647,13 @@ func ReadTopologyInstanceBufferable(instanceKey *InstanceKey, bufferWrites bool, domainName := "" if err := db.QueryRow(config.Config.DetectClusterDomainQuery).Scan(&domainName); err != nil { domainName = "" - logReadTopologyInstanceError(instanceKey, "DetectClusterDomainQuery", err) + _ = logReadTopologyInstanceError(instanceKey, "DetectClusterDomainQuery", err) } if domainName != "" { latency.Start("backend") err := WriteClusterDomainName(instance.ClusterName, domainName) latency.Stop("backend") - logReadTopologyInstanceError(instanceKey, "WriteClusterDomainName", err) + _ = logReadTopologyInstanceError(instanceKey, "WriteClusterDomainName", err) } } @@ -703,7 +705,7 @@ Cleanup: redactedPrimaryExecutedGtidSet, _ := NewOracleGtidSet(instance.primaryExecutedGtidSet) redactedPrimaryExecutedGtidSet.RemoveUUID(instance.SourceUUID) - db.QueryRow("select gtid_subtract(?, ?)", redactedExecutedGtidSet.String(), redactedPrimaryExecutedGtidSet.String()).Scan(&instance.GtidErrant) + _ = db.QueryRow("select gtid_subtract(?, ?)", redactedExecutedGtidSet.String(), redactedPrimaryExecutedGtidSet.String()).Scan(&instance.GtidErrant) } } } @@ -720,7 +722,7 @@ Cleanup: if bufferWrites { enqueueInstanceWrite(instance, instanceFound, err) } else { - WriteInstance(instance, instanceFound, err) + _ = WriteInstance(instance, instanceFound, err) } lastAttemptedCheckTimer.Stop() latency.Stop("backend") @@ -818,7 +820,8 @@ func ReadInstanceClusterAttributes(instance *Instance) (err error) { return nil }) if err != nil { - return log.Errore(err) + log.Error(err) + return err } var replicationDepth uint @@ -897,24 +900,6 @@ func BulkReadInstance() ([](*InstanceKey), error) { return instanceKeys, nil } -func ReadInstancePromotionRule(instance *Instance) (err error) { - var promotionRule promotionrule.CandidatePromotionRule = promotionrule.Neutral - query := ` - select - ifnull(nullif(promotion_rule, ''), 'neutral') as promotion_rule - from candidate_database_instance - where hostname=? and port=? - ` - args := sqlutils.Args(instance.Key.Hostname, instance.Key.Port) - - err = db.QueryOrchestrator(query, args, func(m sqlutils.RowMap) error { - promotionRule = promotionrule.CandidatePromotionRule(m.GetString("promotion_rule")) - return nil - }) - instance.PromotionRule = promotionRule - return log.Errore(err) -} - // readInstanceRow reads a single instance row from the orchestrator backend database. func readInstanceRow(m sqlutils.RowMap) *Instance { instance := NewInstance() @@ -996,7 +981,7 @@ func readInstanceRow(m sqlutils.RowMap) *Instance { instance.InstanceAlias = m.GetString("instance_alias") instance.LastDiscoveryLatency = time.Duration(m.GetInt64("last_discovery_latency")) * time.Nanosecond - instance.Replicas.ReadJSON(replicasJSON) + _ = instance.Replicas.ReadJSON(replicasJSON) instance.applyFlavorName() /* Read Group Replication variables below */ @@ -1006,7 +991,7 @@ func readInstanceRow(m sqlutils.RowMap) *Instance { instance.ReplicationGroupMemberRole = m.GetString("replication_group_member_role") instance.ReplicationGroupPrimaryInstanceKey = InstanceKey{Hostname: m.GetString("replication_group_primary_host"), Port: m.GetInt("replication_group_primary_port")} - instance.ReplicationGroupMembers.ReadJSON(m.GetString("replication_group_members")) + _ = instance.ReplicationGroupMembers.ReadJSON(m.GetString("replication_group_members")) //instance.ReplicationGroup = m.GetString("replication_group_") // problems @@ -1070,11 +1055,13 @@ func readInstancesByCondition(condition string, args []any, sort string) ([](*In return nil }) if err != nil { - return instances, log.Errore(err) + log.Error(err) + return instances, err } err = PopulateInstancesAgents(instances) if err != nil { - return instances, log.Errore(err) + log.Error(err) + return instances, err } return instances, err } @@ -1110,7 +1097,9 @@ func ReadInstance(instanceKey *InstanceKey) (*Instance, bool, error) { // ReadClusterInstances reads all instances of a given cluster func ReadClusterInstances(clusterName string) ([](*Instance), error) { if strings.Contains(clusterName, "'") { - return [](*Instance){}, log.Errorf("Invalid cluster name: %s", clusterName) + errMsg := fmt.Sprintf("Invalid cluster name: %s", clusterName) + log.Errorf(errMsg) + return [](*Instance){}, fmt.Errorf(errMsg) } condition := `cluster_name = ?` return readInstancesByCondition(condition, sqlutils.Args(clusterName), "") @@ -1161,12 +1150,6 @@ func ReadWriteableClustersPrimaries() (instances [](*Instance), err error) { return instances, err } -// ReadClusterAliasInstances reads all instances of a cluster alias -func ReadClusterAliasInstances(clusterAlias string) ([](*Instance), error) { - condition := `suggested_cluster_alias = ? ` - return readInstancesByCondition(condition, sqlutils.Args(clusterAlias), "") -} - // ReadReplicaInstances reads replicas of a given primary func ReadReplicaInstances(primaryKey *InstanceKey) ([](*Instance), error) { condition := ` @@ -1324,26 +1307,6 @@ func ReadFuzzyInstanceKeyIfPossible(fuzzyInstanceKey *InstanceKey) *InstanceKey return fuzzyInstanceKey } -// ReadFuzzyInstance accepts a fuzzy instance key and expects to return a single instance. -// Multiple instances matching the fuzzy keys are not allowed. -func ReadFuzzyInstance(fuzzyInstanceKey *InstanceKey) (*Instance, error) { - if fuzzyInstanceKey == nil { - return nil, log.Errorf("ReadFuzzyInstance received nil input") - } - if fuzzyInstanceKey.IsIPv4() { - // avoid fuzziness. When looking for 10.0.0.1 we don't want to match 10.0.0.15! - instance, _, err := ReadInstance(fuzzyInstanceKey) - return instance, err - } - if fuzzyInstanceKey.Hostname != "" { - // Fuzzy instance search - if fuzzyInstances, _ := findFuzzyInstances(fuzzyInstanceKey); len(fuzzyInstances) == 1 { - return fuzzyInstances[0], nil - } - } - return nil, log.Errorf("Cannot determine fuzzy instance %+v", *fuzzyInstanceKey) -} - // ReadLostInRecoveryInstances returns all instances (potentially filtered by cluster) // which are currently indicated as downtimed due to being lost during a topology recovery. func ReadLostInRecoveryInstances(clusterName string) ([](*Instance), error) { @@ -1421,7 +1384,9 @@ func GetClusterOSCReplicas(clusterName string) ([](*Instance), error) { result := [](*Instance){} var err error if strings.Contains(clusterName, "'") { - return [](*Instance){}, log.Errorf("Invalid cluster name: %s", clusterName) + errMsg := fmt.Sprintf("Invalid cluster name: %s", clusterName) + log.Errorf(errMsg) + return [](*Instance){}, fmt.Errorf(errMsg) } { // Pick up to two busiest IMs @@ -1541,7 +1506,9 @@ func GetClusterGhostReplicas(clusterName string) (result [](*Instance), err erro // GetInstancesMaxLag returns the maximum lag in a set of instances func GetInstancesMaxLag(instances [](*Instance)) (maxLag int64, err error) { if len(instances) == 0 { - return 0, log.Errorf("No instances found in GetInstancesMaxLag") + errMsg := "No instances found in GetInstancesMaxLag" + log.Errorf(errMsg) + return 0, fmt.Errorf(errMsg) } for _, clusterInstance := range instances { if clusterInstance.ReplicationLagSeconds.Valid && clusterInstance.ReplicationLagSeconds.Int64 > maxLag { @@ -1620,37 +1587,10 @@ func updateInstanceClusterName(instance *Instance) error { `, instance.ClusterName, instance.Key.Hostname, instance.Key.Port, ) if err != nil { - return log.Errore(err) - } - AuditOperation("update-cluster-name", &instance.Key, fmt.Sprintf("set to %s", instance.ClusterName)) - return nil - } - return ExecDBWriteFunc(writeFunc) -} - -// ReplaceClusterName replaces all occurrences of oldClusterName with newClusterName -// It is called after a primary failover -func ReplaceClusterName(oldClusterName string, newClusterName string) error { - if oldClusterName == "" { - return log.Errorf("replaceClusterName: skipping empty oldClusterName") - } - if newClusterName == "" { - return log.Errorf("replaceClusterName: skipping empty newClusterName") - } - writeFunc := func() error { - _, err := db.ExecOrchestrator(` - update - database_instance - set - cluster_name=? - where - cluster_name=? - `, newClusterName, oldClusterName, - ) - if err != nil { - return log.Errore(err) + log.Error(err) + return err } - AuditOperation("replace-cluster-name", nil, fmt.Sprintf("replaxced %s with %s", oldClusterName, newClusterName)) + _ = AuditOperation("update-cluster-name", &instance.Key, fmt.Sprintf("set to %s", instance.ClusterName)) return nil } return ExecDBWriteFunc(writeFunc) @@ -1660,7 +1600,8 @@ func ReplaceClusterName(oldClusterName string, newClusterName string) error { func ReviewUnseenInstances() error { instances, err := ReadUnseenInstances() if err != nil { - return log.Errore(err) + log.Error(err) + return err } operations := 0 for _, instance := range instances { @@ -1668,21 +1609,21 @@ func ReviewUnseenInstances() error { primaryHostname, err := ResolveHostname(instance.SourceKey.Hostname) if err != nil { - log.Errore(err) + log.Error(err) continue } instance.SourceKey.Hostname = primaryHostname savedClusterName := instance.ClusterName if err := ReadInstanceClusterAttributes(instance); err != nil { - log.Errore(err) + log.Error(err) } else if instance.ClusterName != savedClusterName { - updateInstanceClusterName(instance) + _ = updateInstanceClusterName(instance) operations++ } } - AuditOperation("review-unseen-instances", nil, fmt.Sprintf("Operations: %d", operations)) + _ = AuditOperation("review-unseen-instances", nil, fmt.Sprintf("Operations: %d", operations)) return err } @@ -1717,7 +1658,8 @@ func readUnseenPrimaryKeys() ([]InstanceKey, error) { return nil }) if err != nil { - return res, log.Errore(err) + log.Error(err) + return res, err } return res, nil @@ -1733,8 +1675,8 @@ func InjectSeed(instanceKey *InstanceKey) error { instance := &Instance{Key: *instanceKey, Version: "Unknown", ClusterName: clusterName} instance.SetSeed() err := WriteInstance(instance, false, nil) - log.Debugf("InjectSeed: %+v, %+v", *instanceKey, err) - AuditOperation("inject-seed", instanceKey, "injected") + log.Infof("InjectSeed: %+v, %+v", *instanceKey, err) + _ = AuditOperation("inject-seed", instanceKey, "injected") return err } @@ -1753,11 +1695,11 @@ func InjectUnseenPrimaries() error { primaryKey := primaryKey if RegexpMatchPatterns(primaryKey.StringCode(), config.Config.DiscoveryIgnorePrimaryHostnameFilters) { - log.Debugf("InjectUnseenPrimaries: skipping discovery of %+v because it matches DiscoveryIgnorePrimaryHostnameFilters", primaryKey) + log.Infof("InjectUnseenPrimaries: skipping discovery of %+v because it matches DiscoveryIgnorePrimaryHostnameFilters", primaryKey) continue } if RegexpMatchPatterns(primaryKey.StringCode(), config.Config.DiscoveryIgnoreHostnameFilters) { - log.Debugf("InjectUnseenPrimaries: skipping discovery of %+v because it matches DiscoveryIgnoreHostnameFilters", primaryKey) + log.Infof("InjectUnseenPrimaries: skipping discovery of %+v because it matches DiscoveryIgnoreHostnameFilters", primaryKey) continue } @@ -1769,7 +1711,7 @@ func InjectUnseenPrimaries() error { } } - AuditOperation("inject-unseen-primaries", nil, fmt.Sprintf("Operations: %d", operations)) + _ = AuditOperation("inject-unseen-primaries", nil, fmt.Sprintf("Operations: %d", operations)) return err } @@ -1806,15 +1748,17 @@ func ForgetUnseenInstancesDifferentlyResolved() error { `, key.Hostname, key.Port, ) if err != nil { - return log.Errore(err) + log.Error(err) + return err } rows, err := sqlResult.RowsAffected() if err != nil { - return log.Errore(err) + log.Error(err) + return err } rowsAffected = rowsAffected + rows } - AuditOperation("forget-unseen-differently-resolved", nil, fmt.Sprintf("Forgotten instances: %d", rowsAffected)) + _ = AuditOperation("forget-unseen-differently-resolved", nil, fmt.Sprintf("Forgotten instances: %d", rowsAffected)) return err } @@ -1843,7 +1787,8 @@ func readUnknownPrimaryHostnameResolves() (map[string]string, error) { return nil }) if err != nil { - return res, log.Errore(err) + log.Error(err) + return res, err } return res, nil @@ -1862,7 +1807,7 @@ func ResolveUnknownPrimaryHostnameResolves() error { UpdateResolvedHostname(hostname, resolvedHostname) } - AuditOperation("resolve-unknown-primaries", nil, fmt.Sprintf("Num resolved hostnames: %d", len(hostnameResolves))) + _ = AuditOperation("resolve-unknown-primaries", nil, fmt.Sprintf("Num resolved hostnames: %d", len(hostnameResolves))) return err } @@ -1890,7 +1835,7 @@ func ReadCountMySQLSnapshots(hostnames []string) (map[string]int, error) { }) if err != nil { - log.Errore(err) + log.Error(err) } return res, err } @@ -1939,8 +1884,8 @@ func GetClusterName(instanceKey *InstanceKey) (clusterName string, err error) { instanceKeyInformativeClusterName.Set(instanceKey.StringCode(), clusterName, cache.DefaultExpiration) return nil }) - - return clusterName, log.Errore(err) + log.Error(err) + return clusterName, err } // ReadClusters reads names of all known clusters @@ -2050,58 +1995,6 @@ func GetHeuristicClusterDomainInstanceAttribute(clusterName string) (instanceKey return ParseRawInstanceKey(writerInstanceName) } -// ReadAllInstanceKeys -func ReadAllInstanceKeys() ([]InstanceKey, error) { - res := []InstanceKey{} - query := ` - select - hostname, port - from - database_instance - ` - err := db.QueryOrchestrator(query, sqlutils.Args(), func(m sqlutils.RowMap) error { - instanceKey, merr := NewResolveInstanceKey(m.GetString("hostname"), m.GetInt("port")) - if merr != nil { - log.Errore(merr) - } else if !InstanceIsForgotten(instanceKey) { - // only if not in "forget" cache - res = append(res, *instanceKey) - } - return nil - }) - return res, log.Errore(err) -} - -// ReadAllInstanceKeysSourceKeys -func ReadAllMinimalInstances() ([]MinimalInstance, error) { - res := []MinimalInstance{} - query := ` - select - hostname, port, source_host, source_port, cluster_name - from - database_instance - ` - err := db.QueryOrchestrator(query, sqlutils.Args(), func(m sqlutils.RowMap) error { - minimalInstance := MinimalInstance{} - minimalInstance.Key = InstanceKey{ - Hostname: m.GetString("hostname"), - Port: m.GetInt("port"), - } - minimalInstance.PrimaryKey = InstanceKey{ - Hostname: m.GetString("source_host"), - Port: m.GetInt("source_port"), - } - minimalInstance.ClusterName = m.GetString("cluster_name") - - if !InstanceIsForgotten(&minimalInstance.Key) { - // only if not in "forget" cache - res = append(res, minimalInstance) - } - return nil - }) - return res, log.Errore(err) -} - // ReadOutdatedInstanceKeys reads and returns keys for all instances that are not up to date (i.e. // pre-configured time has passed since they were last checked) or the ones whose tablet information was read // but not the mysql information. This could happen if the durability policy of the keyspace wasn't @@ -2140,7 +2033,7 @@ func ReadOutdatedInstanceKeys() ([]InstanceKey, error) { err := db.QueryOrchestrator(query, args, func(m sqlutils.RowMap) error { instanceKey, merr := NewResolveInstanceKey(m.GetString("hostname"), m.GetInt("port")) if merr != nil { - log.Errore(merr) + log.Error(merr) } else if !InstanceIsForgotten(instanceKey) { // only if not in "forget" cache res = append(res, *instanceKey) @@ -2150,7 +2043,7 @@ func ReadOutdatedInstanceKeys() ([]InstanceKey, error) { }) if err != nil { - log.Errore(err) + log.Error(err) } return res, err @@ -2378,7 +2271,9 @@ func mkInsertOdkuForInstances(instances []*Instance, instanceWasActuallyFound bo sql, err := mkInsertOdku("database_instance", columns, values, len(instances), insertIgnore) if err != nil { - return sql, args, log.Errorf("Failed to build query: %v", err) + errMsg := fmt.Sprintf("Failed to build query: %v", err) + log.Errorf(errMsg) + return sql, args, fmt.Errorf(errMsg) } return sql, args, nil @@ -2464,7 +2359,7 @@ func flushInstanceWriteBuffer() { lastseen = append(lastseen, upd.instance) } else { instances = append(instances, upd.instance) - log.Debugf("flushInstanceWriteBuffer: will not update database_instance.last_seen due to error: %+v", upd.lastError) + log.Infof("flushInstanceWriteBuffer: will not update database_instance.last_seen due to error: %+v", upd.lastError) } } @@ -2477,11 +2372,15 @@ func flushInstanceWriteBuffer() { writeFunc := func() error { err := writeManyInstances(instances, true, false) if err != nil { - return log.Errorf("flushInstanceWriteBuffer writemany: %v", err) + errMsg := fmt.Sprintf("flushInstanceWriteBuffer writemany: %v", err) + log.Errorf(errMsg) + return fmt.Errorf(errMsg) } err = writeManyInstances(lastseen, true, true) if err != nil { - return log.Errorf("flushInstanceWriteBuffer last_seen: %v", err) + errMsg := fmt.Sprintf("flushInstanceWriteBuffer last_seen: %v", err) + log.Errorf(errMsg) + return fmt.Errorf(errMsg) } writeInstanceCounter.Inc(int64(len(instances) + len(lastseen))) @@ -2494,7 +2393,7 @@ func flushInstanceWriteBuffer() { writeBufferLatency.Stop("write") - writeBufferMetrics.Append(&WriteBufferMetric{ + _ = writeBufferMetrics.Append(&WriteBufferMetric{ Timestamp: time.Now(), WaitLatency: writeBufferLatency.Elapsed("wait"), WriteLatency: writeBufferLatency.Elapsed("write"), @@ -2505,7 +2404,7 @@ func flushInstanceWriteBuffer() { // WriteInstance stores an instance in the orchestrator backend func WriteInstance(instance *Instance, instanceWasActuallyFound bool, lastError error) error { if lastError != nil { - log.Debugf("writeInstance: will not update database_instance due to error: %+v", lastError) + log.Infof("writeInstance: will not update database_instance due to error: %+v", lastError) return nil } return writeManyInstances([]*Instance{instance}, instanceWasActuallyFound, true) @@ -2528,7 +2427,8 @@ func UpdateInstanceLastChecked(instanceKey *InstanceKey, partialSuccess bool) er instanceKey.Hostname, instanceKey.Port, ) - return log.Errore(err) + log.Error(err) + return err } return ExecDBWriteFunc(writeFunc) } @@ -2554,7 +2454,8 @@ func UpdateInstanceLastAttemptedCheck(instanceKey *InstanceKey) error { instanceKey.Hostname, instanceKey.Port, ) - return log.Errore(err) + log.Error(err) + return err } return ExecDBWriteFunc(writeFunc) } @@ -2568,7 +2469,9 @@ func InstanceIsForgotten(instanceKey *InstanceKey) bool { // It may be auto-rediscovered through topology or requested for discovery by multiple means. func ForgetInstance(instanceKey *InstanceKey) error { if instanceKey == nil { - return log.Errorf("ForgetInstance(): nil instanceKey") + errMsg := "ForgetInstance(): nil instanceKey" + log.Errorf(errMsg) + return fmt.Errorf(errMsg) } forgetInstanceKeys.Set(instanceKey.StringCode(), true, cache.DefaultExpiration) sqlResult, err := db.ExecOrchestrator(` @@ -2580,16 +2483,20 @@ func ForgetInstance(instanceKey *InstanceKey) error { instanceKey.Port, ) if err != nil { - return log.Errore(err) + log.Error(err) + return err } rows, err := sqlResult.RowsAffected() if err != nil { - return log.Errore(err) + log.Error(err) + return err } if rows == 0 { - return log.Errorf("ForgetInstance(): instance %+v not found", *instanceKey) + errMsg := fmt.Sprintf("ForgetInstance(): instance %+v not found", *instanceKey) + log.Errorf(errMsg) + return fmt.Errorf(errMsg) } - AuditOperation("forget", instanceKey, "") + _ = AuditOperation("forget", instanceKey, "") return nil } @@ -2605,7 +2512,7 @@ func ForgetCluster(clusterName string) error { } for _, instance := range clusterInstances { forgetInstanceKeys.Set(instance.Key.StringCode(), true, cache.DefaultExpiration) - AuditOperation("forget", &instance.Key, "") + _ = AuditOperation("forget", &instance.Key, "") } _, err = db.ExecOrchestrator(` delete @@ -2627,13 +2534,15 @@ func ForgetLongUnseenInstances() error { config.Config.UnseenInstanceForgetHours, ) if err != nil { - return log.Errore(err) + log.Error(err) + return err } rows, err := sqlResult.RowsAffected() if err != nil { - return log.Errore(err) + log.Error(err) + return err } - AuditOperation("forget-unseen", nil, fmt.Sprintf("Forgotten instances: %d", rows)) + _ = AuditOperation("forget-unseen", nil, fmt.Sprintf("Forgotten instances: %d", rows)) return err } @@ -2652,7 +2561,8 @@ func SnapshotTopologies() error { `, ) if err != nil { - return log.Errore(err) + log.Error(err) + return err } return nil @@ -2688,12 +2598,13 @@ func ReadHistoryClusterInstances(clusterName string, historyTimestampPattern str return nil }) if err != nil { - return instances, log.Errore(err) + log.Error(err) + return instances, err } return instances, err } -// RecordInstanceCoordinatesHistory snapshots the binlog coordinates of instances +// RecordStaleInstanceBinlogCoordinates snapshots the binlog coordinates of instances func RecordStaleInstanceBinlogCoordinates(instanceKey *InstanceKey, binlogCoordinates *BinlogCoordinates) error { args := sqlutils.Args( instanceKey.Hostname, instanceKey.Port, @@ -2712,7 +2623,8 @@ func RecordStaleInstanceBinlogCoordinates(instanceKey *InstanceKey, binlogCoordi args..., ) if err != nil { - return log.Errore(err) + log.Error(err) + return err } _, err = db.ExecOrchestrator(` insert ignore into @@ -2723,7 +2635,8 @@ func RecordStaleInstanceBinlogCoordinates(instanceKey *InstanceKey, binlogCoordi ?, ?, ?, ?, NOW() )`, args...) - return log.Errore(err) + log.Error(err) + return err } func ExpireStaleInstanceBinlogCoordinates() error { @@ -2737,42 +2650,12 @@ func ExpireStaleInstanceBinlogCoordinates() error { where first_seen < NOW() - INTERVAL ? SECOND `, expireSeconds, ) - return log.Errore(err) + log.Error(err) + return err } return ExecDBWriteFunc(writeFunc) } -// GetPreviousKnownRelayLogCoordinatesForInstance returns known relay log coordinates, that are not the -// exact current coordinates -func GetPreviousKnownRelayLogCoordinatesForInstance(instance *Instance) (relayLogCoordinates *BinlogCoordinates, err error) { - query := ` - select - relay_log_file, relay_log_pos - from - database_instance_coordinates_history - where - hostname = ? - and port = ? - and (relay_log_file, relay_log_pos) < (?, ?) - and relay_log_file != '' - and relay_log_pos != 0 - order by - recorded_timestamp desc - limit 1 - ` - err = db.QueryOrchestrator(query, sqlutils.Args( - instance.Key.Hostname, - instance.Key.Port, - instance.RelaylogCoordinates.LogFile, - instance.RelaylogCoordinates.LogPos, - ), func(m sqlutils.RowMap) error { - relayLogCoordinates = &BinlogCoordinates{LogFile: m.GetString("relay_log_file"), LogPos: m.GetInt64("relay_log_pos")} - - return nil - }) - return relayLogCoordinates, err -} - // ResetInstanceRelaylogCoordinatesHistory forgets about the history of an instance. This action is desirable // when relay logs become obsolete or irrelevant. Such is the case on `CHANGE MASTER TO`: servers gets compeltely // new relay logs. @@ -2785,7 +2668,8 @@ func ResetInstanceRelaylogCoordinatesHistory(instanceKey *InstanceKey) error { hostname=? and port=? `, instanceKey.Hostname, instanceKey.Port, ) - return log.Errore(err) + log.Error(err) + return err } return ExecDBWriteFunc(writeFunc) } @@ -2817,11 +2701,14 @@ func FigureClusterName(clusterHint string, instanceKey *InstanceKey, thisInstanc } instance, _, err := ReadInstance(instanceKey) if err != nil { - return true, clusterName, log.Errore(err) + log.Error(err) + return true, clusterName, err } if instance != nil { if instance.ClusterName == "" { - return true, clusterName, log.Errorf("Unable to determine cluster name for %+v, empty cluster name. clusterHint=%+v", instance.Key, clusterHint) + errMsg := fmt.Sprintf("Unable to determine cluster name for %+v, empty cluster name. clusterHint=%+v", instance.Key, clusterHint) + log.Errorf(errMsg) + return true, clusterName, fmt.Errorf(errMsg) } return true, instance.ClusterName, nil } @@ -2839,7 +2726,9 @@ func FigureClusterName(clusterHint string, instanceKey *InstanceKey, thisInstanc if hasResult, clusterName, err := clusterByInstanceKey(thisInstanceKey); hasResult { return clusterName, err } - return clusterName, log.Errorf("Unable to determine cluster name. clusterHint=%+v", clusterHint) + errMsg := fmt.Sprintf("Unable to determine cluster name. clusterHint=%+v", clusterHint) + log.Errorf(errMsg) + return clusterName, fmt.Errorf(errMsg) } // FigureInstanceKey tries to figure out a key @@ -2849,7 +2738,9 @@ func FigureInstanceKey(instanceKey *InstanceKey, thisInstanceKey *InstanceKey) ( } figuredKey := thisInstanceKey if figuredKey == nil { - return nil, log.Errorf("Cannot deduce instance %+v", instanceKey) + errMsg := fmt.Sprintf("Cannot deduce instance %+v", instanceKey) + log.Errorf(errMsg) + return nil, fmt.Errorf(errMsg) } return figuredKey, nil } @@ -2877,8 +2768,10 @@ func PopulateGroupReplicationInformation(instance *Instance, db *sql.DB) error { } // If we got here, the query failed but not because the server does not support group replication. Let's // log the error - return log.Error("There was an error trying to check group replication information for instance "+ + errMsg := fmt.Sprintf("There was an error trying to check group replication information for instance "+ "%+v: %+v", instance.Key, err) + log.Error(errMsg) + return fmt.Errorf(errMsg) } defer rows.Close() foundGroupPrimary := false @@ -2898,7 +2791,7 @@ func PopulateGroupReplicationInformation(instance *Instance, db *sql.DB) error { if err == nil { // ToDo: add support for multi primary groups. if !singlePrimaryGroup { - log.Debugf("This host seems to belong to a multi-primary replication group, which we don't " + + log.Infof("This host seems to belong to a multi-primary replication group, which we don't " + "support") break } @@ -2932,8 +2825,10 @@ func PopulateGroupReplicationInformation(instance *Instance, db *sql.DB) error { if !foundGroupPrimary { err = ReadReplicationGroupPrimary(instance) if err != nil { - return log.Errorf("Unable to find the group primary of instance %+v even though it seems to be "+ + errMsg := fmt.Sprintf("Unable to find the group primary of instance %+v even though it seems to be "+ "part of a replication group", instance.Key) + log.Errorf(errMsg) + return fmt.Errorf(errMsg) } } return nil diff --git a/go/vt/orchestrator/inst/instance_key_map_test.go b/go/vt/orchestrator/inst/instance_key_map_test.go index 62350fd8555..6aa6ef918a5 100644 --- a/go/vt/orchestrator/inst/instance_key_map_test.go +++ b/go/vt/orchestrator/inst/instance_key_map_test.go @@ -21,14 +21,12 @@ import ( "testing" "vitess.io/vitess/go/vt/orchestrator/config" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" test "vitess.io/vitess/go/vt/orchestrator/external/golib/tests" ) func init() { config.Config.HostnameResolveMethod = "none" config.MarkConfigurationLoaded() - log.SetLevel(log.ERROR) } func TestGetInstanceKeys(t *testing.T) { diff --git a/go/vt/orchestrator/inst/instance_key_test.go b/go/vt/orchestrator/inst/instance_key_test.go index d6290ac53e0..237f3f17d12 100644 --- a/go/vt/orchestrator/inst/instance_key_test.go +++ b/go/vt/orchestrator/inst/instance_key_test.go @@ -20,14 +20,12 @@ import ( "testing" "vitess.io/vitess/go/vt/orchestrator/config" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" test "vitess.io/vitess/go/vt/orchestrator/external/golib/tests" ) func init() { config.Config.HostnameResolveMethod = "none" config.MarkConfigurationLoaded() - log.SetLevel(log.ERROR) } var key1 = InstanceKey{Hostname: "host1", Port: 3306} diff --git a/go/vt/orchestrator/inst/instance_test.go b/go/vt/orchestrator/inst/instance_test.go index 7d61d5b7ead..4207ca2f05f 100644 --- a/go/vt/orchestrator/inst/instance_test.go +++ b/go/vt/orchestrator/inst/instance_test.go @@ -20,14 +20,12 @@ import ( "testing" "vitess.io/vitess/go/vt/orchestrator/config" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" test "vitess.io/vitess/go/vt/orchestrator/external/golib/tests" ) func init() { config.Config.HostnameResolveMethod = "none" config.MarkConfigurationLoaded() - log.SetLevel(log.ERROR) } var instance1 = Instance{Key: key1} diff --git a/go/vt/orchestrator/inst/instance_topology.go b/go/vt/orchestrator/inst/instance_topology.go index 9ebe8cdb033..a6b2de2fea1 100644 --- a/go/vt/orchestrator/inst/instance_topology.go +++ b/go/vt/orchestrator/inst/instance_topology.go @@ -25,8 +25,8 @@ import ( "sync" "time" + "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/orchestrator/config" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/math" "vitess.io/vitess/go/vt/orchestrator/external/golib/util" "vitess.io/vitess/go/vt/orchestrator/os" @@ -36,9 +36,8 @@ import ( type StopReplicationMethod string const ( - NoStopReplication StopReplicationMethod = "NoStopReplication" - StopReplicationNormal StopReplicationMethod = "StopReplicationNormal" - StopReplicationNice StopReplicationMethod = "StopReplicationNice" + NoStopReplication StopReplicationMethod = "NoStopReplication" + StopReplicationNice StopReplicationMethod = "StopReplicationNice" ) var ErrReplicationNotRunning = fmt.Errorf("Replication not running") @@ -109,7 +108,7 @@ func ASCIITopology(clusterName string, historyTimestampPattern string, tabulated instancesMap := make(map[InstanceKey](*Instance)) for _, instance := range instances { - log.Debugf("instanceKey: %+v", instance.Key) + log.Infof("instanceKey: %+v", instance.Key) instancesMap[instance.Key] = instance } @@ -230,7 +229,9 @@ func MoveUp(instanceKey *InstanceKey) (*Instance, error) { } primary, err := GetInstancePrimary(instance) if err != nil { - return instance, log.Errorf("Cannot GetInstancePrimary() for %+v. error=%+v", instance.Key, err) + errMsg := fmt.Sprintf("Cannot GetInstancePrimary() for %+v. error=%+v", instance.Key, err) + log.Errorf(errMsg) + return instance, fmt.Errorf(errMsg) } if !primary.IsReplica() { @@ -291,10 +292,11 @@ Cleanup: primary, _ = StartReplication(&primary.Key) } if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } // and we're done (pending deferred functions) - AuditOperation("move-up", instanceKey, fmt.Sprintf("moved up %+v. Previous primary: %+v", *instanceKey, primary.Key)) + _ = AuditOperation("move-up", instanceKey, fmt.Sprintf("moved up %+v. Previous primary: %+v", *instanceKey, primary.Key)) return instance, err } @@ -317,7 +319,9 @@ func MoveUpReplicas(instanceKey *InstanceKey, pattern string) ([]*Instance, *Ins } _, err = GetInstancePrimary(instance) if err != nil { - return res, instance, errs, log.Errorf("Cannot GetInstancePrimary() for %+v. error=%+v", instance.Key, err) + errMsg := fmt.Sprintf("Cannot GetInstancePrimary() for %+v. error=%+v", instance.Key, err) + log.Error(errMsg) + return res, instance, errs, fmt.Errorf(errMsg) } if instance.IsBinlogServer() { @@ -417,13 +421,16 @@ func MoveUpReplicas(instanceKey *InstanceKey, pattern string) ([]*Instance, *Ins Cleanup: instance, _ = StartReplication(instanceKey) if err != nil { - return res, instance, errs, log.Errore(err) + log.Error(err) + return res, instance, errs, err } if len(errs) == len(replicas) { // All returned with error - return res, instance, errs, log.Error("Error on all operations") + errMsg := "Error on all operations" + log.Error(errMsg) + return res, instance, errs, fmt.Errorf(errMsg) } - AuditOperation("move-up-replicas", instanceKey, fmt.Sprintf("moved up %d/%d replicas of %+v. New primary: %+v", len(res), len(replicas), *instanceKey, instance.SourceKey)) + _ = AuditOperation("move-up-replicas", instanceKey, fmt.Sprintf("moved up %d/%d replicas of %+v. New primary: %+v", len(res), len(replicas), *instanceKey, instance.SourceKey)) return res, instance, errs, err } @@ -443,7 +450,9 @@ func MoveBelow(instanceKey, siblingKey *InstanceKey) (*Instance, error) { // Relocation of group secondaries makes no sense, group secondaries, by definition, always replicate from the group // primary if instance.IsReplicationGroupSecondary() { - return instance, log.Errorf("MoveBelow: %+v is a secondary replication group member, hence, it cannot be relocated", instance.Key) + errMsg := fmt.Sprintf("MoveBelow: %+v is a secondary replication group member, hence, it cannot be relocated", instance.Key) + log.Errorf(errMsg) + return instance, fmt.Errorf(errMsg) } if sibling.IsBinlogServer() { @@ -515,10 +524,11 @@ Cleanup: _, _ = StartReplication(siblingKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } // and we're done (pending deferred functions) - AuditOperation("move-below", instanceKey, fmt.Sprintf("moved %+v below %+v", *instanceKey, *siblingKey)) + _ = AuditOperation("move-below", instanceKey, fmt.Sprintf("moved %+v below %+v", *instanceKey, *siblingKey)) return instance, err } @@ -593,10 +603,11 @@ func moveInstanceBelowViaGTID(instance, otherInstance *Instance) (*Instance, err Cleanup: instance, _ = StartReplication(instanceKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } // and we're done (pending deferred functions) - AuditOperation("move-below-gtid", instanceKey, fmt.Sprintf("moved %+v below %+v", *instanceKey, *otherInstanceKey)) + _ = AuditOperation("move-below-gtid", instanceKey, fmt.Sprintf("moved %+v below %+v", *instanceKey, *otherInstanceKey)) return instance, err } @@ -614,7 +625,9 @@ func MoveBelowGTID(instanceKey, otherKey *InstanceKey) (*Instance, error) { // Relocation of group secondaries makes no sense, group secondaries, by definition, always replicate from the group // primary if instance.IsReplicationGroupSecondary() { - return instance, log.Errorf("MoveBelowGTID: %+v is a secondary replication group member, hence, it cannot be relocated", instance.Key) + errMsg := fmt.Sprintf("MoveBelowGTID: %+v is a secondary replication group member, hence, it cannot be relocated", instance.Key) + log.Errorf(errMsg) + return instance, fmt.Errorf(errMsg) } return moveInstanceBelowViaGTID(instance, other) } @@ -682,7 +695,7 @@ func MoveReplicasViaGTID(replicas []*Instance, other *Instance, postponedFunctio // All returned with error return movedReplicas, unmovedReplicas, errs, fmt.Errorf("MoveReplicasViaGTID: Error on all %+v operations", len(errs)) } - AuditOperation("move-replicas-gtid", &other.Key, fmt.Sprintf("moved %d/%d replicas below %+v via GTID", len(movedReplicas), len(replicas), other.Key)) + _ = AuditOperation("move-replicas-gtid", &other.Key, fmt.Sprintf("moved %d/%d replicas below %+v via GTID", len(movedReplicas), len(replicas), other.Key)) return movedReplicas, unmovedReplicas, errs, err } @@ -703,7 +716,7 @@ func MoveReplicasGTID(primaryKey *InstanceKey, belowKey *InstanceKey, pattern st replicas = filterInstancesByPattern(replicas, pattern) movedReplicas, unmovedReplicas, errs, err = MoveReplicasViaGTID(replicas, belowInstance, nil) if err != nil { - log.Errore(err) + log.Error(err) } if len(unmovedReplicas) > 0 { @@ -785,7 +798,8 @@ func Repoint(instanceKey *InstanceKey, primaryKey *InstanceKey, gtidHint Operati Cleanup: instance, _ = StartReplication(instanceKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } // and we're done (pending deferred functions) AuditOperation("repoint", instanceKey, fmt.Sprintf("replica %+v repointed to primary: %+v", *instanceKey, *primaryKey)) @@ -806,7 +820,9 @@ func RepointTo(replicas []*Instance, belowKey *InstanceKey) ([]*Instance, []erro return res, errs, nil } if belowKey == nil { - return res, errs, log.Errorf("RepointTo received nil belowKey") + errMsg := "RepointTo received nil belowKey" + log.Errorf(errMsg) + return res, errs, fmt.Errorf(errMsg) } log.Infof("Will repoint %+v replicas below %+v", len(replicas), *belowKey) @@ -840,9 +856,11 @@ func RepointTo(replicas []*Instance, belowKey *InstanceKey) ([]*Instance, []erro if len(errs) == len(replicas) { // All returned with error - return res, errs, log.Error("Error on all operations") + errMsg := "Error on all operations" + log.Error(errMsg) + return res, errs, fmt.Errorf(errMsg) } - AuditOperation("repoint-to", belowKey, fmt.Sprintf("repointed %d/%d replicas to %+v", len(res), len(replicas), *belowKey)) + _ = AuditOperation("repoint-to", belowKey, fmt.Sprintf("repointed %d/%d replicas to %+v", len(res), len(replicas), *belowKey)) return res, errs, nil } @@ -895,7 +913,7 @@ func MakeCoPrimary(instanceKey *InstanceKey) (*Instance, error) { if instance.IsReplicationGroupSecondary() { return instance, fmt.Errorf("MakeCoPrimary: %+v is a secondary replication group member, hence, it cannot be relocated", instance.Key) } - log.Debugf("Will check whether %+v's primary (%+v) can become its co-primary", instance.Key, primary.Key) + log.Infof("Will check whether %+v's primary (%+v) can become its co-primary", instance.Key, primary.Key) if canMove, merr := primary.CanMoveAsCoPrimary(); !canMove { return instance, merr } @@ -955,7 +973,7 @@ func MakeCoPrimary(instanceKey *InstanceKey) (*Instance, error) { } if instance.AllowTLS { - log.Debugf("Enabling SSL replication") + log.Infof("Enabling SSL replication") _, err = EnablePrimarySSL(&primary.Key) if err != nil { goto Cleanup @@ -973,10 +991,11 @@ func MakeCoPrimary(instanceKey *InstanceKey) (*Instance, error) { Cleanup: primary, _ = StartReplication(&primary.Key) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } // and we're done (pending deferred functions) - AuditOperation("make-co-primary", instanceKey, fmt.Sprintf("%+v made co-primary of %+v", *instanceKey, primary.Key)) + _ = AuditOperation("make-co-primary", instanceKey, fmt.Sprintf("%+v made co-primary of %+v", *instanceKey, primary.Key)) return instance, err } @@ -1013,11 +1032,12 @@ Cleanup: instance, _ = StartReplication(instanceKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } // and we're done (pending deferred functions) - AuditOperation("reset-replica", instanceKey, fmt.Sprintf("%+v replication reset", *instanceKey)) + _ = AuditOperation("reset-replica", instanceKey, fmt.Sprintf("%+v replication reset", *instanceKey)) return instance, err } @@ -1058,10 +1078,11 @@ func DetachReplicaPrimaryHost(instanceKey *InstanceKey) (*Instance, error) { Cleanup: instance, _ = StartReplication(instanceKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } // and we're done (pending deferred functions) - AuditOperation("repoint", instanceKey, fmt.Sprintf("replica %+v detached from primary into %+v", *instanceKey, *detachedPrimaryKey)) + _ = AuditOperation("repoint", instanceKey, fmt.Sprintf("replica %+v detached from primary into %+v", *instanceKey, *detachedPrimaryKey)) return instance, err } @@ -1100,12 +1121,13 @@ func ReattachReplicaPrimaryHost(instanceKey *InstanceKey) (*Instance, error) { goto Cleanup } // Just in case this instance used to be a primary: - ReplaceAliasClusterName(instanceKey.StringCode(), reattachedPrimaryKey.StringCode()) + _ = ReplaceAliasClusterName(instanceKey.StringCode(), reattachedPrimaryKey.StringCode()) Cleanup: instance, _ = StartReplication(instanceKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } // and we're done (pending deferred functions) AuditOperation("repoint", instanceKey, fmt.Sprintf("replica %+v reattached to primary %+v", *instanceKey, *reattachedPrimaryKey)) @@ -1133,7 +1155,7 @@ func EnableGTID(instanceKey *InstanceKey) (*Instance, error) { return instance, fmt.Errorf("Cannot enable GTID on %+v", *instanceKey) } - AuditOperation("enable-gtid", instanceKey, fmt.Sprintf("enabled GTID on %+v", *instanceKey)) + _ = AuditOperation("enable-gtid", instanceKey, fmt.Sprintf("enabled GTID on %+v", *instanceKey)) return instance, err } @@ -1158,7 +1180,7 @@ func DisableGTID(instanceKey *InstanceKey) (*Instance, error) { return instance, fmt.Errorf("Cannot disable GTID on %+v", *instanceKey) } - AuditOperation("disable-gtid", instanceKey, fmt.Sprintf("disabled GTID on %+v", *instanceKey)) + _ = AuditOperation("disable-gtid", instanceKey, fmt.Sprintf("disabled GTID on %+v", *instanceKey)) return instance, err } @@ -1170,7 +1192,9 @@ func LocateErrantGTID(instanceKey *InstanceKey) (errantBinlogs []string, err err } errantSearch := instance.GtidErrant if errantSearch == "" { - return errantBinlogs, log.Errorf("locate-errant-gtid: no errant-gtid on %+v", *instanceKey) + errMsg := fmt.Sprintf("locate-errant-gtid: no errant-gtid on %+v", *instanceKey) + log.Errorf(errMsg) + return errantBinlogs, fmt.Errorf(errMsg) } subtract, err := GTIDSubtract(instanceKey, errantSearch, instance.GtidPurged) if err != nil { @@ -1224,13 +1248,19 @@ func ErrantGTIDResetPrimary(instanceKey *InstanceKey) (instance *Instance, err e return instance, err } if instance.GtidErrant == "" { - return instance, log.Errorf("gtid-errant-reset-primary will not operate on %+v because no errant GTID is found", *instanceKey) + errMsg := fmt.Sprintf("gtid-errant-reset-primary will not operate on %+v because no errant GTID is found", *instanceKey) + log.Errorf(errMsg) + return instance, fmt.Errorf(errMsg) } if !instance.SupportsOracleGTID { - return instance, log.Errorf("gtid-errant-reset-primary requested for %+v but it is not using oracle-gtid", *instanceKey) + errMsg := fmt.Sprintf("gtid-errant-reset-primary requested for %+v but it is not using oracle-gtid", *instanceKey) + log.Errorf(errMsg) + return instance, fmt.Errorf(errMsg) } if len(instance.Replicas) > 0 { - return instance, log.Errorf("gtid-errant-reset-primary will not operate on %+v because it has %+v replicas. Expecting no replicas", *instanceKey, len(instance.Replicas)) + errMsg := fmt.Sprintf("gtid-errant-reset-primary will not operate on %+v because it has %+v replicas. Expecting no replicas", *instanceKey, len(instance.Replicas)) + log.Errorf(errMsg) + return instance, fmt.Errorf(errMsg) } gtidSubtract := "" @@ -1311,10 +1341,11 @@ func ErrantGTIDResetPrimary(instanceKey *InstanceKey) (instance *Instance, err e Cleanup: var startReplicationErr error instance, startReplicationErr = StartReplication(instanceKey) - log.Errore(startReplicationErr) + log.Error(startReplicationErr) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } // and we're done (pending deferred functions) @@ -1331,10 +1362,14 @@ func ErrantGTIDInjectEmpty(instanceKey *InstanceKey) (instance *Instance, cluste return instance, clusterPrimary, countInjectedTransactions, err } if instance.GtidErrant == "" { - return instance, clusterPrimary, countInjectedTransactions, log.Errorf("gtid-errant-inject-empty will not operate on %+v because no errant GTID is found", *instanceKey) + errMsg := fmt.Sprintf("gtid-errant-inject-empty will not operate on %+v because no errant GTID is found", *instanceKey) + log.Errorf(errMsg) + return instance, clusterPrimary, countInjectedTransactions, fmt.Errorf(errMsg) } if !instance.SupportsOracleGTID { - return instance, clusterPrimary, countInjectedTransactions, log.Errorf("gtid-errant-inject-empty requested for %+v but it does not support oracle-gtid", *instanceKey) + errMsg := fmt.Sprintf("gtid-errant-inject-empty requested for %+v but it does not support oracle-gtid", *instanceKey) + log.Errorf(errMsg) + return instance, clusterPrimary, countInjectedTransactions, fmt.Errorf(errMsg) } primaries, err := ReadClusterWriteablePrimary(instance.ClusterName) @@ -1342,12 +1377,16 @@ func ErrantGTIDInjectEmpty(instanceKey *InstanceKey) (instance *Instance, cluste return instance, clusterPrimary, countInjectedTransactions, err } if len(primaries) == 0 { - return instance, clusterPrimary, countInjectedTransactions, log.Errorf("gtid-errant-inject-empty found no writabel primary for %+v cluster", instance.ClusterName) + errMsg := fmt.Sprintf("gtid-errant-inject-empty found no writabel primary for %+v cluster", instance.ClusterName) + log.Errorf(errMsg) + return instance, clusterPrimary, countInjectedTransactions, fmt.Errorf(errMsg) } clusterPrimary = primaries[0] if !clusterPrimary.SupportsOracleGTID { - return instance, clusterPrimary, countInjectedTransactions, log.Errorf("gtid-errant-inject-empty requested for %+v but the cluster's primary %+v does not support oracle-gtid", *instanceKey, clusterPrimary.Key) + errMsg := fmt.Sprintf("gtid-errant-inject-empty requested for %+v but the cluster's primary %+v does not support oracle-gtid", *instanceKey, clusterPrimary.Key) + log.Errorf(errMsg) + return instance, clusterPrimary, countInjectedTransactions, fmt.Errorf(errMsg) } gtidSet, err := NewOracleGtidSet(instance.GtidErrant) @@ -1378,7 +1417,9 @@ func TakeSiblings(instanceKey *InstanceKey) (instance *Instance, takenSiblings i return instance, 0, err } if !instance.IsReplica() { - return instance, takenSiblings, log.Errorf("take-siblings: instance %+v is not a replica.", *instanceKey) + errMsg := fmt.Sprintf("take-siblings: instance %+v is not a replica.", *instanceKey) + log.Errorf(errMsg) + return instance, takenSiblings, fmt.Errorf(errMsg) } relocatedReplicas, _, _, err := RelocateReplicas(&instance.SourceKey, instanceKey, "") @@ -1406,7 +1447,7 @@ func TakePrimaryHook(successor *Instance, demoted *Instance) { processCount := len(config.Config.PostTakePrimaryProcesses) for i, command := range config.Config.PostTakePrimaryProcesses { fullDescription := fmt.Sprintf("PostTakePrimaryProcesses hook %d of %d", i+1, processCount) - log.Debugf("Take-Primary: PostTakePrimaryProcesses: Calling %+s", fullDescription) + log.Infof("Take-Primary: PostTakePrimaryProcesses: Calling %+s", fullDescription) start := time.Now() if err := os.CommandRun(command, env, successorStr, demotedStr); err == nil { info := fmt.Sprintf("Completed %s in %v", fullDescription, time.Since(start)) @@ -1441,7 +1482,7 @@ func TakePrimary(instanceKey *InstanceKey, allowTakingCoPrimary bool) (*Instance if primaryInstance.IsCoPrimary && !allowTakingCoPrimary { return instance, fmt.Errorf("%+v is co-primary. Cannot take it", primaryInstance.Key) } - log.Debugf("TakePrimary: will attempt making %+v take its primary %+v, now resolved as %+v", *instanceKey, instance.SourceKey, primaryInstance.Key) + log.Infof("TakePrimary: will attempt making %+v take its primary %+v, now resolved as %+v", *instanceKey, instance.SourceKey, primaryInstance.Key) if canReplicate, err := primaryInstance.CanReplicateFrom(instance); !canReplicate { return instance, err @@ -1541,25 +1582,12 @@ func sortedReplicasDataCenterHint(replicas [](*Instance), stopReplicationMethod SortInstancesDataCenterHint(replicas, dataCenterHint) for _, replica := range replicas { - log.Debugf("- sorted replica: %+v %+v", replica.Key, replica.ExecBinlogCoordinates) + log.Infof("- sorted replica: %+v %+v", replica.Key, replica.ExecBinlogCoordinates) } return replicas } -// GetSortedReplicas reads list of replicas of a given primary, and returns them sorted by exec coordinates -// (most up-to-date replica first). -func GetSortedReplicas(primaryKey *InstanceKey, stopReplicationMethod StopReplicationMethod) (replicas [](*Instance), err error) { - if replicas, err = getReplicasForSorting(primaryKey, false); err != nil { - return replicas, err - } - replicas = sortedReplicas(replicas, stopReplicationMethod) - if len(replicas) == 0 { - return replicas, fmt.Errorf("No replicas found for %+v", *primaryKey) - } - return replicas, err -} - func isGenerallyValidAsBinlogSource(replica *Instance) bool { if !replica.IsLastCheckValid { // something wrong with this replica right now. We shouldn't hope to be able to promote it @@ -1588,30 +1616,9 @@ func isGenerallyValidAsCandidateReplica(replica *Instance) bool { return true } -// isValidAsCandidatePrimaryInBinlogServerTopology let's us know whether a given replica is generally -// valid to promote to be primary. -func isValidAsCandidatePrimaryInBinlogServerTopology(replica *Instance) bool { - if !replica.IsLastCheckValid { - // something wrong with this replica right now. We shouldn't hope to be able to promote it - return false - } - if !replica.LogBinEnabled { - return false - } - if replica.LogReplicationUpdatesEnabled { - // That's right: we *disallow* log-replica-updates - return false - } - if replica.IsBinlogServer() { - return false - } - - return true -} - func IsBannedFromBeingCandidateReplica(replica *Instance) bool { if replica.PromotionRule == promotionrule.MustNot { - log.Debugf("instance %+v is banned because of promotion rule", replica.Key) + log.Infof("instance %+v is banned because of promotion rule", replica.Key) return true } for _, filter := range config.Config.PromotionIgnoreHostnameFilters { @@ -1626,7 +1633,9 @@ func IsBannedFromBeingCandidateReplica(replica *Instance) bool { // among given instances. This will be used for choosing best candidate for promotion. func getPriorityMajorVersionForCandidate(replicas [](*Instance)) (priorityMajorVersion string, err error) { if len(replicas) == 0 { - return "", log.Errorf("empty replicas list in getPriorityMajorVersionForCandidate") + errMsg := "empty replicas list in getPriorityMajorVersionForCandidate" + log.Errorf(errMsg) + return "", fmt.Errorf(errMsg) } majorVersionsCount := make(map[string]int) for _, replica := range replicas { @@ -1645,7 +1654,9 @@ func getPriorityMajorVersionForCandidate(replicas [](*Instance)) (priorityMajorV // among given instances. This will be used for choosing best candidate for promotion. func getPriorityBinlogFormatForCandidate(replicas [](*Instance)) (priorityBinlogFormat string, err error) { if len(replicas) == 0 { - return "", log.Errorf("empty replicas list in getPriorityBinlogFormatForCandidate") + errMsg := "empty replicas list in getPriorityBinlogFormatForCandidate" + log.Errorf(errMsg) + return "", fmt.Errorf(errMsg) } binlogFormatsCount := make(map[string]int) for _, replica := range replicas { @@ -1753,38 +1764,10 @@ func GetCandidateReplica(primaryKey *InstanceKey, forRematchPurposes bool) (*Ins log.Warningf("GetCandidateReplica: chosen replica: %+v is behind most-up-to-date replica: %+v", candidateReplica.Key, mostUpToDateReplica.Key) } } - log.Debugf("GetCandidateReplica: candidate: %+v, ahead: %d, equal: %d, late: %d, break: %d", candidateReplica.Key, len(aheadReplicas), len(equalReplicas), len(laterReplicas), len(cannotReplicateReplicas)) + log.Infof("GetCandidateReplica: candidate: %+v, ahead: %d, equal: %d, late: %d, break: %d", candidateReplica.Key, len(aheadReplicas), len(equalReplicas), len(laterReplicas), len(cannotReplicateReplicas)) return candidateReplica, aheadReplicas, equalReplicas, laterReplicas, cannotReplicateReplicas, nil } -// GetCandidateReplicaOfBinlogServerTopology chooses the best replica to promote given a (possibly dead) primary -func GetCandidateReplicaOfBinlogServerTopology(primaryKey *InstanceKey) (candidateReplica *Instance, err error) { - replicas, err := getReplicasForSorting(primaryKey, true) - if err != nil { - return candidateReplica, err - } - replicas = sortedReplicas(replicas, NoStopReplication) - if len(replicas) == 0 { - return candidateReplica, fmt.Errorf("No replicas found for %+v", *primaryKey) - } - for _, replica := range replicas { - replica := replica - if candidateReplica != nil { - break - } - if isValidAsCandidatePrimaryInBinlogServerTopology(replica) && !IsBannedFromBeingCandidateReplica(replica) { - // this is the one - candidateReplica = replica - } - } - if candidateReplica != nil { - log.Debugf("GetCandidateReplicaOfBinlogServerTopology: returning %+v as candidate replica for %+v", candidateReplica.Key, *primaryKey) - } else { - log.Debugf("GetCandidateReplicaOfBinlogServerTopology: no candidate replica found for %+v", *primaryKey) - } - return candidateReplica, err -} - func getMostUpToDateActiveBinlogServer(primaryKey *InstanceKey) (mostAdvancedBinlogServer *Instance, binlogServerReplicas [](*Instance), err error) { if binlogServerReplicas, err = ReadBinlogServerReplicaInstances(primaryKey); err == nil && len(binlogServerReplicas) > 0 { // Pick the most advanced binlog sever that is good to go @@ -1844,11 +1827,12 @@ func RegroupReplicasGTID( } moveGTIDFunc := func() error { - log.Debugf("RegroupReplicasGTID: working on %d replicas", len(replicasToMove)) + log.Infof("RegroupReplicasGTID: working on %d replicas", len(replicasToMove)) movedReplicas, unmovedReplicas, _, err = MoveReplicasViaGTID(replicasToMove, candidateReplica, postponedFunctionsContainer) unmovedReplicas = append(unmovedReplicas, aheadReplicas...) - return log.Errore(err) + log.Error(err) + return err } if postponedFunctionsContainer != nil && postponeAllMatchOperations != nil && postponeAllMatchOperations(candidateReplica, hasBestPromotionRule) { postponedFunctionsContainer.AddPostponedFunction(moveGTIDFunc, fmt.Sprintf("regroup-replicas-gtid %+v", candidateReplica.Key)) @@ -1858,7 +1842,7 @@ func RegroupReplicasGTID( StartReplication(&candidateReplica.Key) - log.Debugf("RegroupReplicasGTID: done") + log.Infof("RegroupReplicasGTID: done") AuditOperation("regroup-replicas-gtid", primaryKey, fmt.Sprintf("regrouped replicas of %+v via GTID; promoted %+v", *primaryKey, candidateReplica.Key)) return unmovedReplicas, movedReplicas, cannotReplicateReplicas, candidateReplica, err } @@ -1926,16 +1910,18 @@ func RegroupReplicas(primaryKey *InstanceKey, returnReplicaEvenOnFailureToRegrou } } if allGTID { - log.Debugf("RegroupReplicas: using GTID to regroup replicas of %+v", *primaryKey) + log.Infof("RegroupReplicas: using GTID to regroup replicas of %+v", *primaryKey) unmovedReplicas, movedReplicas, cannotReplicateReplicas, candidateReplica, err := RegroupReplicasGTID(primaryKey, returnReplicaEvenOnFailureToRegroup, onCandidateReplicaChosen, nil, nil) return unmovedReplicas, emptyReplicas, movedReplicas, cannotReplicateReplicas, candidateReplica, err } if allBinlogServers { - log.Debugf("RegroupReplicas: using binlog servers to regroup replicas of %+v", *primaryKey) + log.Infof("RegroupReplicas: using binlog servers to regroup replicas of %+v", *primaryKey) movedReplicas, candidateReplica, err := RegroupReplicasBinlogServers(primaryKey, returnReplicaEvenOnFailureToRegroup) return emptyReplicas, emptyReplicas, movedReplicas, cannotReplicateReplicas, candidateReplica, err } - return emptyReplicas, emptyReplicas, emptyReplicas, emptyReplicas, instance, log.Errorf("No solution path found for RegroupReplicas") + errMsg := "No solution path found for RegroupReplicas" + log.Errorf(errMsg) + return emptyReplicas, emptyReplicas, emptyReplicas, emptyReplicas, instance, fmt.Errorf(errMsg) } // relocateBelowInternal is a protentially recursive function which chooses how to relocate an instance below another. @@ -1943,7 +1929,9 @@ func RegroupReplicas(primaryKey *InstanceKey, returnReplicaEvenOnFailureToRegrou // or it may combine any of the above in a multi-step operation. func relocateBelowInternal(instance, other *Instance) (*Instance, error) { if canReplicate, err := instance.CanReplicateFrom(other); !canReplicate { - return instance, log.Errorf("%+v cannot replicate from %+v. Reason: %+v", instance.Key, other.Key, err) + errMsg := fmt.Sprintf("%+v cannot replicate from %+v. Reason: %+v", instance.Key, other.Key, err) + log.Errorf(errMsg) + return instance, fmt.Errorf(errMsg) } // simplest: if InstanceIsPrimaryOf(other, instance) { @@ -1975,13 +1963,17 @@ func relocateBelowInternal(instance, other *Instance) (*Instance, error) { return instance, err } if !found { - return instance, log.Errorf("Cannot find primary %+v", other.SourceKey) + errMsg := fmt.Sprintf("Cannot find primary %+v", other.SourceKey) + log.Errorf(errMsg) + return instance, fmt.Errorf(errMsg) } if !other.IsLastCheckValid { - return instance, log.Errorf("Binlog server %+v is not reachable. It would take two steps to relocate %+v below it, and I won't even do the first step.", other.Key, instance.Key) + errMsg := fmt.Sprintf("Binlog server %+v is not reachable. It would take two steps to relocate %+v below it, and I won't even do the first step.", other.Key, instance.Key) + log.Errorf(errMsg) + return instance, fmt.Errorf(errMsg) } - log.Debugf("Relocating to a binlog server; will first attempt to relocate to the binlog server's primary: %+v, and then repoint down", otherPrimary.Key) + log.Infof("Relocating to a binlog server; will first attempt to relocate to the binlog server's primary: %+v, and then repoint down", otherPrimary.Key) if _, err := relocateBelowInternal(instance, otherPrimary); err != nil { return instance, err } @@ -1991,7 +1983,9 @@ func relocateBelowInternal(instance, other *Instance) (*Instance, error) { // Can only move within the binlog-server family tree // And these have been covered just now: move up from a primary binlog server, move below a binling binlog server. // sure, the family can be more complex, but we keep these operations atomic - return nil, log.Errorf("Relocating binlog server %+v below %+v turns to be too complex; please do it manually", instance.Key, other.Key) + errMsg := fmt.Sprintf("Relocating binlog server %+v below %+v turns to be too complex; please do it manually", instance.Key, other.Key) + log.Errorf(errMsg) + return nil, fmt.Errorf(errMsg) } // Next, try GTID if _, _, gtidCompatible := instancesAreGTIDAndCompatible(instance, other); gtidCompatible { @@ -2018,7 +2012,9 @@ func relocateBelowInternal(instance, other *Instance) (*Instance, error) { return relocateBelowInternal(instance, other) } // Too complex - return nil, log.Errorf("Relocating %+v below %+v turns to be too complex; please do it manually", instance.Key, other.Key) + errMsg := fmt.Sprintf("Relocating %+v below %+v turns to be too complex; please do it manually", instance.Key, other.Key) + log.Errorf(errMsg) + return nil, fmt.Errorf(errMsg) } // RelocateBelow will attempt moving instance indicated by instanceKey below another instance. @@ -2027,23 +2023,33 @@ func relocateBelowInternal(instance, other *Instance) (*Instance, error) { func RelocateBelow(instanceKey, otherKey *InstanceKey) (*Instance, error) { instance, found, err := ReadInstance(instanceKey) if err != nil || !found { - return instance, log.Errorf("Error reading %+v", *instanceKey) + errMsg := fmt.Sprintf("Error reading %+v", *instanceKey) + log.Errorf(errMsg) + return instance, fmt.Errorf(errMsg) } // Relocation of group secondaries makes no sense, group secondaries, by definition, always replicate from the group // primary if instance.IsReplicationGroupSecondary() { - return instance, log.Errorf("relocate: %+v is a secondary replication group member, hence, it cannot be relocated", instance.Key) + errMsg := fmt.Sprintf("relocate: %+v is a secondary replication group member, hence, it cannot be relocated", instance.Key) + log.Errorf(errMsg) + return instance, fmt.Errorf(errMsg) } other, found, err := ReadInstance(otherKey) if err != nil || !found { - return instance, log.Errorf("Error reading %+v", *otherKey) + errMsg := fmt.Sprintf("Error reading %+v", *otherKey) + log.Errorf(errMsg) + return instance, fmt.Errorf(errMsg) } // Disallow setting up a group primary to replicate from a group secondary if instance.IsReplicationGroupPrimary() && other.ReplicationGroupName == instance.ReplicationGroupName { - return instance, log.Errorf("relocate: Setting a group primary to replicate from another member of its group is disallowed") + errMsg := "relocate: Setting a group primary to replicate from another member of its group is disallowed" + log.Errorf(errMsg) + return instance, fmt.Errorf(errMsg) } if other.IsDescendantOf(instance) { - return instance, log.Errorf("relocate: %+v is a descendant of %+v", *otherKey, instance.Key) + errMsg := fmt.Sprintf("relocate: %+v is a descendant of %+v", *otherKey, instance.Key) + log.Errorf(errMsg) + return instance, fmt.Errorf(errMsg) } instance, err = relocateBelowInternal(instance, other) if err == nil { @@ -2104,7 +2110,9 @@ func relocateReplicasInternal(replicas []*Instance, instance, other *Instance) ( } // Too complex - return nil, errs, log.Errorf("Relocating %+v replicas of %+v below %+v turns to be too complex; please do it manually", len(replicas), instance.Key, other.Key) + errMsg := fmt.Sprintf("Relocating %+v replicas of %+v below %+v turns to be too complex; please do it manually", len(replicas), instance.Key, other.Key) + log.Errorf(errMsg) + return nil, errs, fmt.Errorf(errMsg) } // RelocateReplicas will attempt moving replicas of an instance indicated by instanceKey below another instance. @@ -2114,11 +2122,15 @@ func RelocateReplicas(instanceKey, otherKey *InstanceKey, pattern string) (repli instance, found, err := ReadInstance(instanceKey) if err != nil || !found { - return replicas, other, errs, log.Errorf("Error reading %+v", *instanceKey) + errMsg := fmt.Sprintf("Error reading %+v", *instanceKey) + log.Errorf(errMsg) + return replicas, other, errs, fmt.Errorf(errMsg) } other, found, err = ReadInstance(otherKey) if err != nil || !found { - return replicas, other, errs, log.Errorf("Error reading %+v", *otherKey) + errMsg := fmt.Sprintf("Error reading %+v", *otherKey) + log.Errorf(errMsg) + return replicas, other, errs, fmt.Errorf(errMsg) } replicas, err = ReadReplicaInstances(instanceKey) @@ -2133,7 +2145,9 @@ func RelocateReplicas(instanceKey, otherKey *InstanceKey, pattern string) (repli } for _, replica := range replicas { if other.IsDescendantOf(replica) { - return replicas, other, errs, log.Errorf("relocate-replicas: %+v is a descendant of %+v", *otherKey, replica.Key) + errMsg := fmt.Sprintf("relocate-replicas: %+v is a descendant of %+v", *otherKey, replica.Key) + log.Errorf(errMsg) + return replicas, other, errs, fmt.Errorf(errMsg) } } replicas, errs, err = relocateReplicasInternal(replicas, instance, other) @@ -2154,7 +2168,9 @@ func PurgeBinaryLogsTo(instanceKey *InstanceKey, logFile string, force bool) (*I purgeCoordinates := &BinlogCoordinates{LogFile: logFile, LogPos: 0} for _, replica := range replicas { if !purgeCoordinates.SmallerThan(&replica.ExecBinlogCoordinates) { - return nil, log.Errorf("Unsafe to purge binary logs on %+v up to %s because replica %+v has only applied up to %+v", *instanceKey, logFile, replica.Key, replica.ExecBinlogCoordinates) + errMsg := fmt.Sprintf("Unsafe to purge binary logs on %+v up to %s because replica %+v has only applied up to %+v", *instanceKey, logFile, replica.Key, replica.ExecBinlogCoordinates) + log.Errorf(errMsg) + return nil, fmt.Errorf(errMsg) } } } @@ -2165,7 +2181,8 @@ func PurgeBinaryLogsTo(instanceKey *InstanceKey, logFile string, force bool) (*I func PurgeBinaryLogsToLatest(instanceKey *InstanceKey, force bool) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } return PurgeBinaryLogsTo(instanceKey, instance.SelfBinlogCoordinates.LogFile, force) } diff --git a/go/vt/orchestrator/inst/instance_topology_dao.go b/go/vt/orchestrator/inst/instance_topology_dao.go index a4cf5cdae17..8baea6bca00 100644 --- a/go/vt/orchestrator/inst/instance_topology_dao.go +++ b/go/vt/orchestrator/inst/instance_topology_dao.go @@ -23,9 +23,9 @@ import ( "strings" "time" + "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/orchestrator/config" "vitess.io/vitess/go/vt/orchestrator/db" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" ) @@ -63,16 +63,6 @@ func ExecuteOnTopology(f func()) { f() } -// ScanInstanceRow executes a read-a-single-row query on a given MySQL topology instance -func ScanInstanceRow(instanceKey *InstanceKey, query string, dest ...any) error { - db, err := db.OpenTopology(instanceKey.Hostname, instanceKey.Port) - if err != nil { - return err - } - err = db.QueryRow(query).Scan(dest...) - return err -} - // EmptyCommitInstance issues an empty COMMIT on a given instance func EmptyCommitInstance(instanceKey *InstanceKey) error { db, err := db.OpenTopology(instanceKey.Hostname, instanceKey.Port) @@ -105,27 +95,6 @@ func RefreshTopologyInstance(instanceKey *InstanceKey) (*Instance, error) { return inst, nil } -// RefreshTopologyInstances will do a blocking (though concurrent) refresh of all given instances -func RefreshTopologyInstances(instances [](*Instance)) { - // use concurrency but wait for all to complete - barrier := make(chan InstanceKey) - for _, instance := range instances { - instance := instance - go func() { - // Signal completed replica - defer func() { barrier <- instance.Key }() - // Wait your turn to read a replica - ExecuteOnTopology(func() { - log.Debugf("... reading instance: %+v", instance.Key) - ReadTopologyInstance(&instance.Key) - }) - }() - } - for range instances { - <-barrier - } -} - // GetReplicationRestartPreserveStatements returns a sequence of statements that make sure a replica is stopped // and then returned to the same state. For example, if the replica was fully running, this will issue // a STOP on both io_thread and sql_thread, followed by START on both. If one of them is not running @@ -165,12 +134,13 @@ func FlushBinaryLogs(instanceKey *InstanceKey, count int) (*Instance, error) { for i := 0; i < count; i++ { _, err := ExecInstance(instanceKey, `flush binary logs`) if err != nil { - return nil, log.Errore(err) + log.Error(err) + return nil, err } } log.Infof("flush-binary-logs count=%+v on %+v", count, *instanceKey) - AuditOperation("flush-binary-logs", instanceKey, "success") + _ = AuditOperation("flush-binary-logs", instanceKey, "success") return ReadTopologyInstance(instanceKey) } @@ -179,12 +149,15 @@ func FlushBinaryLogs(instanceKey *InstanceKey, count int) (*Instance, error) { func FlushBinaryLogsTo(instanceKey *InstanceKey, logFile string) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } distance := instance.SelfBinlogCoordinates.FileNumberDistance(&BinlogCoordinates{LogFile: logFile}) if distance < 0 { - return nil, log.Errorf("FlushBinaryLogsTo: target log file %+v is smaller than current log file %+v", logFile, instance.SelfBinlogCoordinates.LogFile) + errMsg := fmt.Sprintf("FlushBinaryLogsTo: target log file %+v is smaller than current log file %+v", logFile, instance.SelfBinlogCoordinates.LogFile) + log.Errorf(errMsg) + return nil, fmt.Errorf(errMsg) } return FlushBinaryLogs(instanceKey, distance) } @@ -197,40 +170,22 @@ func purgeBinaryLogsTo(instanceKey *InstanceKey, logFile string) (*Instance, err _, err := ExecInstance(instanceKey, "purge binary logs to ?", logFile) if err != nil { - return nil, log.Errore(err) + log.Error(err) + return nil, err } log.Infof("purge-binary-logs to=%+v on %+v", logFile, *instanceKey) - AuditOperation("purge-binary-logs", instanceKey, "success") + _ = AuditOperation("purge-binary-logs", instanceKey, "success") return ReadTopologyInstance(instanceKey) } -// TODO(sougou): implement count -func SetSemiSyncPrimary(instanceKey *InstanceKey, enablePrimary bool) error { - if _, err := ExecInstance(instanceKey, `set global rpl_semi_sync_master_enabled = ?, global rpl_semi_sync_slave_enabled = ?`, enablePrimary, false); err != nil { - return log.Errore(err) - } - return nil -} - -// TODO(sougou): This function may be used later for fixing semi-sync -func SetSemiSyncReplica(instanceKey *InstanceKey, enableReplica bool) error { - if _, err := ExecInstance(instanceKey, `set global rpl_semi_sync_master_enabled = ?, global rpl_semi_sync_slave_enabled = ?`, false, enableReplica); err != nil { - return log.Errore(err) - } - // Need to apply change by stopping starting IO thread - ExecInstance(instanceKey, "stop slave io_thread") - if _, err := ExecInstance(instanceKey, "start slave io_thread"); err != nil { - return log.Errore(err) - } - return nil -} - func RestartReplicationQuick(instanceKey *InstanceKey) error { for _, cmd := range []string{`stop slave sql_thread`, `stop slave io_thread`, `start slave io_thread`, `start slave sql_thread`} { if _, err := ExecInstance(instanceKey, cmd); err != nil { - return log.Errorf("%+v: RestartReplicationQuick: '%q' failed: %+v", *instanceKey, cmd, err) + errMsg := fmt.Sprintf("%+v: RestartReplicationQuick: '%q' failed: %+v", *instanceKey, cmd, err) + log.Errorf(errMsg) + return fmt.Errorf(errMsg) } log.Infof("%s on %+v as part of RestartReplicationQuick", cmd, *instanceKey) } @@ -243,7 +198,8 @@ func RestartReplicationQuick(instanceKey *InstanceKey) error { func StopReplicationNicely(instanceKey *InstanceKey, timeout time.Duration) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } if !instance.ReplicationThreadsExist() { @@ -253,7 +209,9 @@ func StopReplicationNicely(instanceKey *InstanceKey, timeout time.Duration) (*In // stop io_thread, start sql_thread but catch any errors for _, cmd := range []string{`stop slave io_thread`, `start slave sql_thread`} { if _, err := ExecInstance(instanceKey, cmd); err != nil { - return nil, log.Errorf("%+v: StopReplicationNicely: '%q' failed: %+v", *instanceKey, cmd, err) + errMsg := fmt.Sprintf("%+v: StopReplicationNicely: '%q' failed: %+v", *instanceKey, cmd, err) + log.Errorf(errMsg) + return nil, fmt.Errorf(errMsg) } } @@ -266,7 +224,8 @@ func StopReplicationNicely(instanceKey *InstanceKey, timeout time.Duration) (*In _, err = ExecInstance(instanceKey, `stop slave`) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } instance, err = ReadTopologyInstance(instanceKey) @@ -291,7 +250,8 @@ func WaitForSQLThreadUpToDate(instanceKey *InstanceKey, overallTimeout time.Dura return ReadTopologyInstance(instanceKey) }) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } if instance.SQLThreadUpToDate() { @@ -299,7 +259,9 @@ func WaitForSQLThreadUpToDate(instanceKey *InstanceKey, overallTimeout time.Dura return instance, nil } if instance.SQLDelay != 0 { - return instance, log.Errorf("WaitForSQLThreadUpToDate: instance %+v has SQL Delay %+v. Operation is irrelevant", *instanceKey, instance.SQLDelay) + errMsg := fmt.Sprintf("WaitForSQLThreadUpToDate: instance %+v has SQL Delay %+v. Operation is irrelevant", *instanceKey, instance.SQLDelay) + log.Errorf(errMsg) + return instance, fmt.Errorf(errMsg) } if !instance.ExecBinlogCoordinates.Equals(&lastExecBinlogCoordinates) { @@ -314,11 +276,15 @@ func WaitForSQLThreadUpToDate(instanceKey *InstanceKey, overallTimeout time.Dura select { case <-generalTimer.C: - return instance, log.Errorf("WaitForSQLThreadUpToDate timeout on %+v after duration %+v", *instanceKey, overallTimeout) + errMsg := fmt.Sprintf("WaitForSQLThreadUpToDate timeout on %+v after duration %+v", *instanceKey, overallTimeout) + log.Errorf(errMsg) + return instance, fmt.Errorf(errMsg) case <-staleTimer.C: - return instance, log.Errorf("WaitForSQLThreadUpToDate stale coordinates timeout on %+v after duration %+v", *instanceKey, staleCoordinatesTimeout) + errMsg := fmt.Sprintf("WaitForSQLThreadUpToDate stale coordinates timeout on %+v after duration %+v", *instanceKey, staleCoordinatesTimeout) + log.Errorf(errMsg) + return instance, fmt.Errorf(errMsg) default: - log.Debugf("WaitForSQLThreadUpToDate waiting on %+v", *instanceKey) + log.Infof("WaitForSQLThreadUpToDate waiting on %+v", *instanceKey) time.Sleep(retryInterval) } } @@ -332,7 +298,7 @@ func StopReplicas(replicas [](*Instance), stopReplicationMethod StopReplicationM } refreshedReplicas := [](*Instance){} - log.Debugf("Stopping %d replicas via %s", len(replicas), string(stopReplicationMethod)) + log.Infof("Stopping %d replicas via %s", len(replicas), string(stopReplicationMethod)) // use concurrency but wait for all to complete barrier := make(chan *Instance) for _, replica := range replicas { @@ -357,25 +323,17 @@ func StopReplicas(replicas [](*Instance), stopReplicationMethod StopReplicationM return refreshedReplicas } -// StopReplicasNicely will attemt to stop all given replicas nicely, up to timeout -func StopReplicasNicely(replicas [](*Instance), timeout time.Duration) [](*Instance) { - stoppedReplicas := StopReplicas(replicas, StopReplicationNice, timeout) - // We remove nil instances because StopReplicas might introduce nils in the array that it returns in case of - // failures while reading the tablet from the backend. This could happen when the tablet is forgotten while we are - // trying to stop the replication on the tablets. - stoppedReplicas = RemoveNilInstances(stoppedReplicas) - return stoppedReplicas -} - // StopReplication stops replication on a given instance func StopReplication(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } _, err = ExecInstance(instanceKey, `stop slave`) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } instance, err = ReadTopologyInstance(instanceKey) @@ -410,7 +368,8 @@ func waitForReplicationState(instanceKey *InstanceKey, expectedState Replication func StartReplication(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } if !instance.IsReplica() { @@ -419,7 +378,8 @@ func StartReplication(instanceKey *InstanceKey) (*Instance, error) { _, err = ExecInstance(instanceKey, `start slave`) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } log.Infof("Started replication on %+v", instanceKey) @@ -427,7 +387,8 @@ func StartReplication(instanceKey *InstanceKey) (*Instance, error) { instance, err = ReadTopologyInstance(instanceKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } if !instance.ReplicaRunning() { return instance, ErrReplicationNotRunning @@ -439,29 +400,12 @@ func StartReplication(instanceKey *InstanceKey) (*Instance, error) { func RestartReplication(instanceKey *InstanceKey) (instance *Instance, err error) { instance, err = StopReplication(instanceKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } instance, err = StartReplication(instanceKey) - return instance, log.Errore(err) -} - -// StartReplicas will do concurrent start-replica -func StartReplicas(replicas [](*Instance)) { - // use concurrency but wait for all to complete - log.Debugf("Starting %d replicas", len(replicas)) - barrier := make(chan InstanceKey) - for _, instance := range replicas { - instance := instance - go func() { - // Signal compelted replica - defer func() { barrier <- instance.Key }() - // Wait your turn to read a replica - ExecuteOnTopology(func() { StartReplication(&instance.Key) }) - }() - } - for range replicas { - <-barrier - } + log.Error(err) + return instance, err } func WaitForExecBinlogCoordinatesToReach(instanceKey *InstanceKey, coordinates *BinlogCoordinates, maxWait time.Duration) (instance *Instance, exactMatch bool, err error) { @@ -472,7 +416,8 @@ func WaitForExecBinlogCoordinatesToReach(instanceKey *InstanceKey, coordinates * } instance, err = ReadTopologyInstance(instanceKey) if err != nil { - return instance, exactMatch, log.Errore(err) + log.Error(err) + return instance, exactMatch, err } switch { @@ -490,7 +435,8 @@ func WaitForExecBinlogCoordinatesToReach(instanceKey *InstanceKey, coordinates * func StartReplicationUntilPrimaryCoordinates(instanceKey *InstanceKey, primaryCoordinates *BinlogCoordinates) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } if !instance.IsReplica() { @@ -508,12 +454,14 @@ func StartReplicationUntilPrimaryCoordinates(instanceKey *InstanceKey, primaryCo _, err = ExecInstance(instanceKey, "start slave until master_log_file=?, master_log_pos=?", primaryCoordinates.LogFile, primaryCoordinates.LogPos) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } instance, exactMatch, err := WaitForExecBinlogCoordinatesToReach(instanceKey, primaryCoordinates, 0) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } if !exactMatch { return instance, fmt.Errorf("Start SLAVE UNTIL is past coordinates: %+v", instanceKey) @@ -521,7 +469,8 @@ func StartReplicationUntilPrimaryCoordinates(instanceKey *InstanceKey, primaryCo instance, err = StopReplication(instanceKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } return instance, err @@ -531,13 +480,14 @@ func StartReplicationUntilPrimaryCoordinates(instanceKey *InstanceKey, primaryCo func EnablePrimarySSL(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } if instance.ReplicationThreadsExist() && !instance.ReplicationThreadsStopped() { return instance, fmt.Errorf("EnablePrimarySSL: Cannot enable SSL replication on %+v because replication threads are not stopped", *instanceKey) } - log.Debugf("EnablePrimarySSL: Will attempt enabling SSL replication on %+v", *instanceKey) + log.Infof("EnablePrimarySSL: Will attempt enabling SSL replication on %+v", *instanceKey) if *config.RuntimeCLIFlags.Noop { return instance, fmt.Errorf("noop: aborting CHANGE MASTER TO MASTER_SSL=1 operation on %+v; signaling error but nothing went wrong", *instanceKey) @@ -545,7 +495,8 @@ func EnablePrimarySSL(instanceKey *InstanceKey) (*Instance, error) { _, err = ExecInstance(instanceKey, "change master to master_ssl=1") if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } log.Infof("EnablePrimarySSL: Enabled SSL replication on %+v", *instanceKey) @@ -556,7 +507,7 @@ func EnablePrimarySSL(instanceKey *InstanceKey) (*Instance, error) { // See https://bugs.mysql.com/bug.php?id=83713 func workaroundBug83713(instanceKey *InstanceKey) { - log.Debugf("workaroundBug83713: %+v", *instanceKey) + log.Infof("workaroundBug83713: %+v", *instanceKey) queries := []string{ `reset slave`, `start slave IO_THREAD`, @@ -565,7 +516,7 @@ func workaroundBug83713(instanceKey *InstanceKey) { } for _, query := range queries { if _, err := ExecInstance(instanceKey, query); err != nil { - log.Debugf("workaroundBug83713: error on %s: %+v", query, err) + log.Infof("workaroundBug83713: error on %s: %+v", query, err) } } } @@ -576,22 +527,23 @@ func ChangePrimaryTo(instanceKey *InstanceKey, primaryKey *InstanceKey, primaryB user, password := config.Config.MySQLReplicaUser, config.Config.MySQLReplicaPassword instance, err := ReadTopologyInstance(instanceKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } if instance.ReplicationThreadsExist() && !instance.ReplicationThreadsStopped() { return instance, fmt.Errorf("ChangePrimaryTo: Cannot change primary on: %+v because replication threads are not stopped", *instanceKey) } - log.Debugf("ChangePrimaryTo: will attempt changing primary on %+v to %+v, %+v", *instanceKey, *primaryKey, *primaryBinlogCoordinates) + log.Infof("ChangePrimaryTo: will attempt changing primary on %+v to %+v, %+v", *instanceKey, *primaryKey, *primaryBinlogCoordinates) changeToPrimaryKey := primaryKey if !skipUnresolve { unresolvedPrimaryKey, nameUnresolved, err := UnresolveHostname(primaryKey) if err != nil { - log.Debugf("ChangePrimaryTo: aborting operation on %+v due to resolving error on %+v: %+v", *instanceKey, *primaryKey, err) + log.Infof("ChangePrimaryTo: aborting operation on %+v due to resolving error on %+v: %+v", *instanceKey, *primaryKey, err) return instance, err } if nameUnresolved { - log.Debugf("ChangePrimaryTo: Unresolved %+v into %+v", *primaryKey, unresolvedPrimaryKey) + log.Infof("ChangePrimaryTo: Unresolved %+v into %+v", *primaryKey, unresolvedPrimaryKey) } changeToPrimaryKey = &unresolvedPrimaryKey } @@ -667,24 +619,27 @@ func ChangePrimaryTo(instanceKey *InstanceKey, primaryKey *InstanceKey, primaryB } err = changePrimaryFunc() if err != nil && instance.UsingOracleGTID && strings.Contains(err.Error(), Error1201CouldnotInitializePrimaryInfoStructure) { - log.Debugf("ChangePrimaryTo: got %+v", err) + log.Infof("ChangePrimaryTo: got %+v", err) workaroundBug83713(instanceKey) err = changePrimaryFunc() } if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } durability, err := GetDurabilityPolicy(*primaryKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } semiSync := IsReplicaSemiSync(durability, *primaryKey, *instanceKey) if _, err := ExecInstance(instanceKey, `set global rpl_semi_sync_master_enabled = ?, global rpl_semi_sync_slave_enabled = ?`, false, semiSync); err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } - ResetInstanceRelaylogCoordinatesHistory(instanceKey) + _ = ResetInstanceRelaylogCoordinatesHistory(instanceKey) log.Infof("ChangePrimaryTo: Changed primary on %+v to: %+v, %+v. GTID: %+v", *instanceKey, primaryKey, primaryBinlogCoordinates, changedViaGTID) @@ -692,35 +647,12 @@ func ChangePrimaryTo(instanceKey *InstanceKey, primaryKey *InstanceKey, primaryB return instance, err } -// SkipToNextBinaryLog changes primary position to beginning of next binlog -// USE WITH CARE! -// Use case is binlog servers where the primary was gone & replaced by another. -func SkipToNextBinaryLog(instanceKey *InstanceKey) (*Instance, error) { - instance, err := ReadTopologyInstance(instanceKey) - if err != nil { - return instance, log.Errore(err) - } - - nextFileCoordinates, err := instance.ExecBinlogCoordinates.NextFileCoordinates() - if err != nil { - return instance, log.Errore(err) - } - nextFileCoordinates.LogPos = 4 - log.Debugf("Will skip replication on %+v to next binary log: %+v", instance.Key, nextFileCoordinates.LogFile) - - instance, err = ChangePrimaryTo(&instance.Key, &instance.SourceKey, &nextFileCoordinates, false, GTIDHintNeutral) - if err != nil { - return instance, log.Errore(err) - } - AuditOperation("skip-binlog", instanceKey, fmt.Sprintf("Skipped replication to next binary log: %+v", nextFileCoordinates.LogFile)) - return StartReplication(instanceKey) -} - // ResetReplication resets a replica, breaking the replication func ResetReplication(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } if instance.ReplicationThreadsExist() && !instance.ReplicationThreadsStopped() { @@ -737,16 +669,18 @@ func ResetReplication(instanceKey *InstanceKey) (*Instance, error) { // RESET SLAVE ALL command solves this, but only as of 5.6.3 _, err = ExecInstance(instanceKey, `change master to master_host='_'`) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } _, err = ExecInstance(instanceKey, `reset slave /*!50603 all */`) if err != nil && strings.Contains(err.Error(), Error1201CouldnotInitializePrimaryInfoStructure) { - log.Debugf("ResetReplication: got %+v", err) + log.Infof("ResetReplication: got %+v", err) workaroundBug83713(instanceKey) _, err = ExecInstance(instanceKey, `reset slave /*!50603 all */`) } if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } log.Infof("Reset replication %+v", instanceKey) @@ -758,7 +692,8 @@ func ResetReplication(instanceKey *InstanceKey) (*Instance, error) { func ResetPrimary(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } if instance.ReplicationThreadsExist() && !instance.ReplicationThreadsStopped() { @@ -771,7 +706,8 @@ func ResetPrimary(instanceKey *InstanceKey) (*Instance, error) { _, err = ExecInstance(instanceKey, `reset master`) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } log.Infof("Reset primary %+v", instanceKey) @@ -850,7 +786,8 @@ func skipQueryOracleGtid(instance *Instance) error { func SkipQuery(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } if !instance.IsReplica() { @@ -867,18 +804,21 @@ func SkipQuery(instanceKey *InstanceKey) (*Instance, error) { return instance, fmt.Errorf("noop: aborting skip-query operation on %+v; signalling error but nothing went wrong", *instanceKey) } - log.Debugf("Skipping one query on %+v", instanceKey) + log.Infof("Skipping one query on %+v", instanceKey) if instance.UsingOracleGTID { err = skipQueryOracleGtid(instance) } else if instance.UsingMariaDBGTID { - return instance, log.Errorf("%+v is replicating with MariaDB GTID. To skip a query first disable GTID, then skip, then enable GTID again", *instanceKey) + errMsg := fmt.Sprintf("%+v is replicating with MariaDB GTID. To skip a query first disable GTID, then skip, then enable GTID again", *instanceKey) + log.Errorf(errMsg) + return instance, fmt.Errorf(errMsg) } else { err = skipQueryClassic(instance) } if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } - AuditOperation("skip-query", instanceKey, "Skipped one query") + _ = AuditOperation("skip-query", instanceKey, "Skipped one query") return StartReplication(instanceKey) } @@ -886,12 +826,14 @@ func SkipQuery(instanceKey *InstanceKey) (*Instance, error) { func PrimaryPosWait(instanceKey *InstanceKey, binlogCoordinates *BinlogCoordinates) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } _, err = ExecInstance(instanceKey, `select master_pos_wait(?, ?)`, binlogCoordinates.LogFile, binlogCoordinates.LogPos) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } log.Infof("Instance %+v has reached coordinates: %+v", instanceKey, binlogCoordinates) @@ -903,7 +845,8 @@ func PrimaryPosWait(instanceKey *InstanceKey, binlogCoordinates *BinlogCoordinat func SetReadOnly(instanceKey *InstanceKey, readOnly bool) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } if *config.RuntimeCLIFlags.Noop { @@ -911,7 +854,8 @@ func SetReadOnly(instanceKey *InstanceKey, readOnly bool) (*Instance, error) { } if _, err := ExecInstance(instanceKey, "set global read_only = ?", readOnly); err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } if config.Config.UseSuperReadOnly { if _, err := ExecInstance(instanceKey, "set global super_read_only = ?", readOnly); err != nil { @@ -919,13 +863,13 @@ func SetReadOnly(instanceKey *InstanceKey, readOnly bool) (*Instance, error) { // MySQL 5.7.8 and Percona Server 5.6.21-70 // At this time orchestrator does not verify whether a server supports super_read_only or not. // It makes a best effort to set it. - log.Errore(err) + log.Error(err) } } instance, err = ReadTopologyInstance(instanceKey) log.Infof("instance %+v read_only: %t", instanceKey, readOnly) - AuditOperation("read-only", instanceKey, fmt.Sprintf("set as %t", readOnly)) + _ = AuditOperation("read-only", instanceKey, fmt.Sprintf("set as %t", readOnly)) return instance, err } @@ -934,7 +878,8 @@ func SetReadOnly(instanceKey *InstanceKey, readOnly bool) (*Instance, error) { func KillQuery(instanceKey *InstanceKey, process int64) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } if *config.RuntimeCLIFlags.Noop { @@ -943,16 +888,18 @@ func KillQuery(instanceKey *InstanceKey, process int64) (*Instance, error) { _, err = ExecInstance(instanceKey, `kill query ?`, process) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } instance, err = ReadTopologyInstance(instanceKey) if err != nil { - return instance, log.Errore(err) + log.Error(err) + return instance, err } log.Infof("Killed query on %+v", *instanceKey) - AuditOperation("kill-query", instanceKey, fmt.Sprintf("Killed query %d", process)) + _ = AuditOperation("kill-query", instanceKey, fmt.Sprintf("Killed query %d", process)) return instance, err } diff --git a/go/vt/orchestrator/inst/instance_topology_test.go b/go/vt/orchestrator/inst/instance_topology_test.go index 15cd04a158d..6f07e51123d 100644 --- a/go/vt/orchestrator/inst/instance_topology_test.go +++ b/go/vt/orchestrator/inst/instance_topology_test.go @@ -6,7 +6,6 @@ import ( "testing" "vitess.io/vitess/go/vt/orchestrator/config" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" test "vitess.io/vitess/go/vt/orchestrator/external/golib/tests" "vitess.io/vitess/go/vt/vtctl/reparentutil/promotionrule" ) @@ -23,7 +22,6 @@ var ( func init() { config.Config.HostnameResolveMethod = "none" config.MarkConfigurationLoaded() - log.SetLevel(log.ERROR) } func generateTestInstances() (instances [](*Instance), instancesMap map[string](*Instance)) { diff --git a/go/vt/orchestrator/inst/maintenance_dao.go b/go/vt/orchestrator/inst/maintenance_dao.go index 424f8dc53a5..0300d9a71bd 100644 --- a/go/vt/orchestrator/inst/maintenance_dao.go +++ b/go/vt/orchestrator/inst/maintenance_dao.go @@ -19,9 +19,10 @@ package inst import ( "fmt" + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/orchestrator/config" "vitess.io/vitess/go/vt/orchestrator/db" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" "vitess.io/vitess/go/vt/orchestrator/process" "vitess.io/vitess/go/vt/orchestrator/util" @@ -63,7 +64,7 @@ func ReadActiveMaintenance() ([]Maintenance, error) { }) if err != nil { - log.Errore(err) + log.Error(err) } return res, err @@ -95,7 +96,8 @@ func BeginBoundedMaintenance(instanceKey *InstanceKey, owner string, reason stri explicitlyBounded, ) if err != nil { - return maintenanceToken, log.Errore(err) + log.Error(err) + return maintenanceToken, err } if affected, _ := res.RowsAffected(); affected == 0 { @@ -103,7 +105,7 @@ func BeginBoundedMaintenance(instanceKey *InstanceKey, owner string, reason stri } else { // success maintenanceToken, _ = res.LastInsertId() - AuditOperation("begin-maintenance", instanceKey, fmt.Sprintf("maintenanceToken: %d, owner: %s, reason: %s", maintenanceToken, owner, reason)) + _ = AuditOperation("begin-maintenance", instanceKey, fmt.Sprintf("maintenanceToken: %d, owner: %s, reason: %s", maintenanceToken, owner, reason)) } return maintenanceToken, err } @@ -130,13 +132,14 @@ func EndMaintenanceByInstanceKey(instanceKey *InstanceKey) (wasMaintenance bool, instanceKey.Port, ) if err != nil { - return wasMaintenance, log.Errore(err) + log.Error(err) + return wasMaintenance, err } if affected, _ := res.RowsAffected(); affected > 0 { // success wasMaintenance = true - AuditOperation("end-maintenance", instanceKey, "") + _ = AuditOperation("end-maintenance", instanceKey, "") } return wasMaintenance, err } @@ -160,7 +163,8 @@ func InMaintenance(instanceKey *InstanceKey) (inMaintenance bool, err error) { return nil }) - return inMaintenance, log.Errore(err) + log.Error(err) + return inMaintenance, err } // ReadMaintenanceInstanceKey will return the instanceKey for active maintenance by maintenanceToken @@ -185,7 +189,8 @@ func ReadMaintenanceInstanceKey(maintenanceToken int64) (*InstanceKey, error) { return nil }) - return res, log.Errore(err) + log.Error(err) + return res, err } // EndMaintenance will terminate an active maintenance via maintenanceToken @@ -202,13 +207,14 @@ func EndMaintenance(maintenanceToken int64) (wasMaintenance bool, err error) { maintenanceToken, ) if err != nil { - return wasMaintenance, log.Errore(err) + log.Error(err) + return wasMaintenance, err } if affected, _ := res.RowsAffected(); affected > 0 { // success wasMaintenance = true instanceKey, _ := ReadMaintenanceInstanceKey(maintenanceToken) - AuditOperation("end-maintenance", instanceKey, fmt.Sprintf("maintenanceToken: %d", maintenanceToken)) + _ = AuditOperation("end-maintenance", instanceKey, fmt.Sprintf("maintenanceToken: %d", maintenanceToken)) } return wasMaintenance, err } @@ -226,10 +232,11 @@ func ExpireMaintenance() error { config.MaintenancePurgeDays, ) if err != nil { - return log.Errore(err) + log.Error(err) + return err } if rowsAffected, _ := res.RowsAffected(); rowsAffected > 0 { - AuditOperation("expire-maintenance", nil, fmt.Sprintf("Purged historical entries: %d", rowsAffected)) + _ = AuditOperation("expire-maintenance", nil, fmt.Sprintf("Purged historical entries: %d", rowsAffected)) } } { @@ -242,10 +249,11 @@ func ExpireMaintenance() error { `, ) if err != nil { - return log.Errore(err) + log.Error(err) + return err } if rowsAffected, _ := res.RowsAffected(); rowsAffected > 0 { - AuditOperation("expire-maintenance", nil, fmt.Sprintf("Expired bounded: %d", rowsAffected)) + _ = AuditOperation("expire-maintenance", nil, fmt.Sprintf("Expired bounded: %d", rowsAffected)) } } { @@ -260,10 +268,11 @@ func ExpireMaintenance() error { `, ) if err != nil { - return log.Errore(err) + log.Error(err) + return err } if rowsAffected, _ := res.RowsAffected(); rowsAffected > 0 { - AuditOperation("expire-maintenance", nil, fmt.Sprintf("Expired dead: %d", rowsAffected)) + _ = AuditOperation("expire-maintenance", nil, fmt.Sprintf("Expired dead: %d", rowsAffected)) } } diff --git a/go/vt/orchestrator/inst/pool.go b/go/vt/orchestrator/inst/pool.go index 594283377e8..834b8c042a5 100644 --- a/go/vt/orchestrator/inst/pool.go +++ b/go/vt/orchestrator/inst/pool.go @@ -20,9 +20,9 @@ import ( "strings" "time" - "vitess.io/vitess/go/vt/orchestrator/config" + "vitess.io/vitess/go/vt/log" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" + "vitess.io/vitess/go/vt/orchestrator/config" ) // PoolInstancesMap lists instance keys per pool name @@ -67,13 +67,14 @@ func ApplyPoolInstances(submission *PoolInstancesSubmission) error { instanceKey = ReadFuzzyInstanceKeyIfPossible(instanceKey) } if err != nil { - return log.Errore(err) + log.Error(err) + return err } instanceKeys = append(instanceKeys, instanceKey) } } - log.Debugf("submitting %d instances in %+v pool", len(instanceKeys), submission.Pool) - writePoolInstances(submission.Pool, instanceKeys) + log.Infof("submitting %d instances in %+v pool", len(instanceKeys), submission.Pool) + _ = writePoolInstances(submission.Pool, instanceKeys) return nil } diff --git a/go/vt/orchestrator/inst/pool_dao.go b/go/vt/orchestrator/inst/pool_dao.go index 3d87d52d5c5..8bfbcb96967 100644 --- a/go/vt/orchestrator/inst/pool_dao.go +++ b/go/vt/orchestrator/inst/pool_dao.go @@ -19,9 +19,10 @@ package inst import ( "fmt" + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/orchestrator/config" "vitess.io/vitess/go/vt/orchestrator/db" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" ) @@ -30,18 +31,21 @@ func writePoolInstances(pool string, instanceKeys [](*InstanceKey)) error { writeFunc := func() error { dbh, err := db.OpenOrchestrator() if err != nil { - return log.Errore(err) + log.Error(err) + return err } tx, _ := dbh.Begin() if _, err := tx.Exec(`delete from database_instance_pool where pool = ?`, pool); err != nil { tx.Rollback() - return log.Errore(err) + log.Error(err) + return err } query := `insert into database_instance_pool (hostname, port, pool, registered_at) values (?, ?, ?, now())` for _, instanceKey := range instanceKeys { if _, err := tx.Exec(query, instanceKey.Hostname, instanceKey.Port, pool); err != nil { tx.Rollback() - return log.Errore(err) + log.Error(err) + return err } } tx.Commit() @@ -117,31 +121,6 @@ func ReadClusterPoolInstancesMap(clusterName string, pool string) (*PoolInstance return &poolInstancesMap, nil } -func ReadAllPoolInstancesSubmissions() ([]PoolInstancesSubmission, error) { - result := []PoolInstancesSubmission{} - query := ` - select - pool, - min(registered_at) as registered_at, - GROUP_CONCAT(concat(hostname, ':', port)) as hosts - from - database_instance_pool - group by - pool - ` - err := db.QueryOrchestrator(query, sqlutils.Args(), func(m sqlutils.RowMap) error { - submission := PoolInstancesSubmission{} - submission.Pool = m.GetString("pool") - submission.CreatedAt = m.GetTime("registered_at") - submission.RegisteredAt = m.GetString("registered_at") - submission.DelimitedInstances = m.GetString("hosts") - result = append(result, submission) - return nil - }) - - return result, log.Errore(err) -} - // ExpirePoolInstances cleans up the database_instance_pool table from expired items func ExpirePoolInstances() error { _, err := db.ExecOrchestrator(` @@ -152,5 +131,6 @@ func ExpirePoolInstances() error { `, config.Config.InstancePoolExpiryMinutes, ) - return log.Errore(err) + log.Error(err) + return err } diff --git a/go/vt/orchestrator/inst/postponed_functions.go b/go/vt/orchestrator/inst/postponed_functions.go index e713f12bfd5..f250009c8f5 100644 --- a/go/vt/orchestrator/inst/postponed_functions.go +++ b/go/vt/orchestrator/inst/postponed_functions.go @@ -19,7 +19,7 @@ package inst import ( "sync" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" + "vitess.io/vitess/go/vt/log" ) type PostponedFunctionsContainer struct { @@ -49,9 +49,9 @@ func (postponedFuncsContainer *PostponedFunctionsContainer) AddPostponedFunction } func (postponedFuncsContainer *PostponedFunctionsContainer) Wait() { - log.Debugf("PostponedFunctionsContainer: waiting on %+v postponed functions", postponedFuncsContainer.Len()) + log.Infof("PostponedFunctionsContainer: waiting on %+v postponed functions", postponedFuncsContainer.Len()) postponedFuncsContainer.waitGroup.Wait() - log.Debugf("PostponedFunctionsContainer: done waiting") + log.Infof("PostponedFunctionsContainer: done waiting") } func (postponedFuncsContainer *PostponedFunctionsContainer) Len() int { diff --git a/go/vt/orchestrator/inst/resolve.go b/go/vt/orchestrator/inst/resolve.go index 92f08f1822d..899e082c1b3 100644 --- a/go/vt/orchestrator/inst/resolve.go +++ b/go/vt/orchestrator/inst/resolve.go @@ -27,8 +27,8 @@ import ( "github.com/patrickmn/go-cache" + "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/orchestrator/config" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" ) type HostnameResolve struct { @@ -155,7 +155,7 @@ func ResolveHostname(hostname string) (string, error) { } // Unfound: resolve! - log.Debugf("Hostname unresolved yet: %s", hostname) + log.Infof("Hostname unresolved yet: %s", hostname) resolvedHostname, err := resolveHostname(hostname) if config.Config.RejectHostnameResolvePattern != "" { // Reject, don't even cache @@ -172,7 +172,7 @@ func ResolveHostname(hostname string) (string, error) { return hostname, err } // Good result! Cache it, also to DB - log.Debugf("Cache hostname resolve %s as %s", hostname, resolvedHostname) + log.Infof("Cache hostname resolve %s as %s", hostname, resolvedHostname) go UpdateResolvedHostname(hostname, resolvedHostname) return resolvedHostname, nil } @@ -189,7 +189,7 @@ func UpdateResolvedHostname(hostname string, resolvedHostname string) bool { } getHostnameResolvesLightweightCache().Set(hostname, resolvedHostname, 0) if !HostnameResolveMethodIsNone() { - WriteResolvedHostname(hostname, resolvedHostname) + _ = WriteResolvedHostname(hostname, resolvedHostname) } return true } @@ -221,7 +221,7 @@ func FlushNontrivialResolveCacheToDatabase() error { for hostname := range items { resolvedHostname, found := getHostnameResolvesLightweightCache().Get(hostname) if found && (resolvedHostname.(string) != hostname) { - WriteResolvedHostname(hostname, resolvedHostname.(string)) + _ = WriteResolvedHostname(hostname, resolvedHostname.(string)) } } return nil @@ -244,7 +244,8 @@ func UnresolveHostname(instanceKey *InstanceKey) (InstanceKey, bool, error) { } unresolvedHostname, err := readUnresolvedHostname(instanceKey.Hostname) if err != nil { - return *instanceKey, false, log.Errore(err) + log.Error(err) + return *instanceKey, false, err } if unresolvedHostname == instanceKey.Hostname { // unchanged. Nothing to do @@ -255,7 +256,8 @@ func UnresolveHostname(instanceKey *InstanceKey) (InstanceKey, bool, error) { instance, err := ReadTopologyInstance(unresolvedKey) if err != nil { - return *instanceKey, false, log.Errore(err) + log.Error(err) + return *instanceKey, false, err } if instance.IsBinlogServer() && config.Config.SkipBinlogServerUnresolveCheck { // Do nothing. Everything is assumed to be fine. @@ -264,7 +266,9 @@ func UnresolveHostname(instanceKey *InstanceKey) (InstanceKey, bool, error) { if *config.RuntimeCLIFlags.SkipUnresolveCheck { return *instanceKey, false, nil } - return *instanceKey, false, log.Errorf("Error unresolving; hostname=%s, unresolved=%s, re-resolved=%s; mismatch. Skip/ignore with --skip-unresolve-check", instanceKey.Hostname, unresolvedKey.Hostname, instance.Key.Hostname) + errMsg := fmt.Sprintf("Error unresolving; hostname=%s, unresolved=%s, re-resolved=%s; mismatch. Skip/ignore with --skip-unresolve-check", instanceKey.Hostname, unresolvedKey.Hostname, instance.Key.Hostname) + log.Errorf(errMsg) + return *instanceKey, false, fmt.Errorf(errMsg) } return *unresolvedKey, true, nil } @@ -297,7 +301,8 @@ func getHostnameIPs(hostname string) (ips []net.IP, fromCache bool, err error) { } ips, err = net.LookupIP(hostname) if err != nil { - return ips, false, log.Errore(err) + log.Error(err) + return ips, false, err } hostnameIPsCache.Set(hostname, ips, cache.DefaultExpiration) return ips, false, nil diff --git a/go/vt/orchestrator/inst/resolve_dao.go b/go/vt/orchestrator/inst/resolve_dao.go index 998e1571c58..85192e17e05 100644 --- a/go/vt/orchestrator/inst/resolve_dao.go +++ b/go/vt/orchestrator/inst/resolve_dao.go @@ -19,9 +19,10 @@ package inst import ( "github.com/rcrowley/go-metrics" + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/orchestrator/config" "vitess.io/vitess/go/vt/orchestrator/db" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" ) @@ -32,11 +33,11 @@ var readUnresolvedHostnameCounter = metrics.NewCounter() var readAllResolvedHostnamesCounter = metrics.NewCounter() func init() { - metrics.Register("resolve.write_resolved", writeResolvedHostnameCounter) - metrics.Register("resolve.write_unresolved", writeUnresolvedHostnameCounter) - metrics.Register("resolve.read_resolved", readResolvedHostnameCounter) - metrics.Register("resolve.read_unresolved", readUnresolvedHostnameCounter) - metrics.Register("resolve.read_resolved_all", readAllResolvedHostnamesCounter) + _ = metrics.Register("resolve.write_resolved", writeResolvedHostnameCounter) + _ = metrics.Register("resolve.write_unresolved", writeUnresolvedHostnameCounter) + _ = metrics.Register("resolve.read_resolved", readResolvedHostnameCounter) + _ = metrics.Register("resolve.read_unresolved", readUnresolvedHostnameCounter) + _ = metrics.Register("resolve.read_resolved_all", readAllResolvedHostnamesCounter) } // WriteResolvedHostname stores a hostname and the resolved hostname to backend database @@ -54,7 +55,8 @@ func WriteResolvedHostname(hostname string, resolvedHostname string) error { hostname, resolvedHostname) if err != nil { - return log.Errore(err) + log.Error(err) + return err } if hostname != resolvedHostname { // history is only interesting when there's actually something to resolve... @@ -96,7 +98,7 @@ func ReadResolvedHostname(hostname string) (string, error) { readResolvedHostnameCounter.Inc(1) if err != nil { - log.Errore(err) + log.Error(err) } return resolvedHostname, err } @@ -119,7 +121,7 @@ func ReadAllHostnameResolves() ([]HostnameResolve, error) { readAllResolvedHostnamesCounter.Inc(1) if err != nil { - log.Errore(err) + log.Error(err) } return res, err } @@ -141,20 +143,8 @@ func ReadAllHostnameUnresolves() ([]HostnameUnresolve, error) { return nil }) - return unres, log.Errore(err) -} - -// ReadAllHostnameUnresolves returns the content of the hostname_unresolve table -func ReadAllHostnameUnresolvesRegistrations() (registrations []HostnameRegistration, err error) { - unresolves, err := ReadAllHostnameUnresolves() - if err != nil { - return registrations, err - } - for _, unresolve := range unresolves { - registration := NewHostnameRegistration(&InstanceKey{Hostname: unresolve.hostname}, unresolve.unresolvedHostname) - registrations = append(registrations, *registration) - } - return registrations, nil + log.Error(err) + return unres, err } // readUnresolvedHostname reverse-reads hostname resolve. It returns a hostname which matches given pattern and resovles to resolvedHostname, @@ -178,7 +168,7 @@ func readUnresolvedHostname(hostname string) (string, error) { readUnresolvedHostnameCounter.Inc(1) if err != nil { - log.Errore(err) + log.Error(err) } return unresolvedHostname, err } @@ -198,7 +188,8 @@ func WriteHostnameUnresolve(instanceKey *InstanceKey, unresolvedHostname string) `, instanceKey.Hostname, unresolvedHostname, ) if err != nil { - return log.Errore(err) + log.Error(err) + return err } _, _ = db.ExecOrchestrator(` replace into hostname_unresolve_history ( @@ -222,7 +213,8 @@ func DeleteHostnameUnresolve(instanceKey *InstanceKey) error { where hostname=? `, instanceKey.Hostname, ) - return log.Errore(err) + log.Error(err) + return err } return ExecDBWriteFunc(writeFunc) } @@ -235,7 +227,8 @@ func ExpireHostnameUnresolve() error { where last_registered < NOW() - INTERVAL ? MINUTE `, config.Config.ExpiryHostnameResolvesMinutes, ) - return log.Errore(err) + log.Error(err) + return err } return ExecDBWriteFunc(writeFunc) } @@ -284,7 +277,7 @@ func DeleteInvalidHostnameResolves() error { hostname = ?`, invalidHostname, ) - log.Errore(err) + log.Error(err) } return err } @@ -315,7 +308,8 @@ func writeHostnameIPs(hostname string, ipv4String string, ipv6String string) err ipv4String, ipv6String, ) - return log.Errore(err) + log.Error(err) + return err } return ExecDBWriteFunc(writeFunc) } diff --git a/go/vt/orchestrator/inst/tablet_dao.go b/go/vt/orchestrator/inst/tablet_dao.go index 979e98983a5..d89a2b3d24c 100644 --- a/go/vt/orchestrator/inst/tablet_dao.go +++ b/go/vt/orchestrator/inst/tablet_dao.go @@ -20,13 +20,14 @@ import ( "context" "errors" + "vitess.io/vitess/go/vt/log" + "google.golang.org/protobuf/encoding/prototext" "google.golang.org/protobuf/proto" "vitess.io/vitess/go/vt/logutil" "vitess.io/vitess/go/vt/orchestrator/db" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" replicationdatapb "vitess.io/vitess/go/vt/proto/replicationdata" topodatapb "vitess.io/vitess/go/vt/proto/topodata" @@ -82,12 +83,12 @@ func SwitchPrimary(newPrimaryKey, oldPrimaryKey InstanceKey) error { }) // Don't proceed if shard record could not be updated. if err != nil { - log.Errore(err) + log.Error(err) return nil } if _, err := ChangeTabletType(oldPrimaryKey, topodatapb.TabletType_REPLICA, IsReplicaSemiSync(durability, newPrimaryKey, oldPrimaryKey)); err != nil { // This is best effort. - log.Errore(err) + log.Error(err) } return nil } @@ -111,10 +112,11 @@ func ChangeTabletType(instanceKey InstanceKey, tabletType topodatapb.TabletType, defer tsCancel() ti, err := TopoServ.GetTablet(tsCtx, tablet.Alias) if err != nil { - return nil, log.Errore(err) + log.Error(err) + return nil, err } if err := SaveTablet(ti.Tablet); err != nil { - log.Errore(err) + log.Error(err) } return ti.Tablet, nil } diff --git a/go/vt/orchestrator/inst/tag_dao.go b/go/vt/orchestrator/inst/tag_dao.go index cf301788f16..91778865d38 100644 --- a/go/vt/orchestrator/inst/tag_dao.go +++ b/go/vt/orchestrator/inst/tag_dao.go @@ -19,8 +19,9 @@ package inst import ( "fmt" + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/orchestrator/db" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" ) @@ -46,13 +47,19 @@ func PutInstanceTag(instanceKey *InstanceKey, tag *Tag) (err error) { func Untag(instanceKey *InstanceKey, tag *Tag) (tagged *InstanceKeyMap, err error) { if tag == nil { - return nil, log.Errorf("Untag: tag is nil") + errMsg := "untag: tag is nil" + log.Errorf(errMsg) + return nil, fmt.Errorf(errMsg) } if tag.Negate { - return nil, log.Errorf("Untag: does not support negation") + errMsg := "untag: does not support negation" + log.Errorf(errMsg) + return nil, fmt.Errorf(errMsg) } if instanceKey == nil && !tag.HasValue { - return nil, log.Errorf("Untag: either indicate an instance or a tag value. Will not delete on-valued tag across instances") + errMsg := "untag: either indicate an instance or a tag value. Will not delete on-valued tag across instances" + log.Errorf(errMsg) + return nil, fmt.Errorf(errMsg) } var clause string args := sqlutils.Args() @@ -93,9 +100,10 @@ func Untag(instanceKey *InstanceKey, tag *Tag) (tagged *InstanceKeyMap, err erro `, clause, ) if _, err = db.ExecOrchestrator(query, args...); err != nil { - return tagged, log.Errore(err) + log.Error(err) + return tagged, err } - AuditOperation("delete-instance-tag", instanceKey, tag.String()) + _ = AuditOperation("delete-instance-tag", instanceKey, tag.String()) return tagged, nil } @@ -117,11 +125,8 @@ func ReadInstanceTag(instanceKey *InstanceKey, tag *Tag) (tagExists bool, err er return nil }) - return tagExists, log.Errore(err) -} - -func InstanceTagExists(instanceKey *InstanceKey, tag *Tag) (tagExists bool, err error) { - return ReadInstanceTag(instanceKey, &Tag{TagName: tag.TagName}) + log.Error(err) + return tagExists, err } func ReadInstanceTags(instanceKey *InstanceKey) (tags [](*Tag), err error) { @@ -146,12 +151,15 @@ func ReadInstanceTags(instanceKey *InstanceKey) (tags [](*Tag), err error) { return nil }) - return tags, log.Errore(err) + log.Error(err) + return tags, err } func GetInstanceKeysByTag(tag *Tag) (tagged *InstanceKeyMap, err error) { if tag == nil { - return nil, log.Errorf("GetInstanceKeysByTag: tag is nil") + errMsg := "GetInstanceKeysByTag: tag is nil" + log.Errorf(errMsg) + return nil, fmt.Errorf(errMsg) } clause := `` args := sqlutils.Args() @@ -188,5 +196,6 @@ func GetInstanceKeysByTag(tag *Tag) (tagged *InstanceKeyMap, err error) { tagged.AddKey(*key) return nil }) - return tagged, log.Errore(err) + log.Error(err) + return tagged, err } diff --git a/go/vt/orchestrator/logic/command_applier.go b/go/vt/orchestrator/logic/command_applier.go index 2271be174ad..650a20a230b 100644 --- a/go/vt/orchestrator/logic/command_applier.go +++ b/go/vt/orchestrator/logic/command_applier.go @@ -18,10 +18,11 @@ package logic import ( "encoding/json" + "fmt" - "vitess.io/vitess/go/vt/orchestrator/inst" + "vitess.io/vitess/go/vt/log" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" + "vitess.io/vitess/go/vt/orchestrator/inst" ) // AsyncRequest represents an entry in the async_request table @@ -78,7 +79,9 @@ func (applier *CommandApplier) ApplyCommand(op string, value []byte) any { case "set-cluster-alias-manual-override": return applier.setClusterAliasManualOverride(value) } - return log.Errorf("Unknown command op: %s", op) + errMsg := fmt.Sprintf("Unknown command op: %s", op) + log.Errorf(errMsg) + return fmt.Errorf(errMsg) } func (applier *CommandApplier) registerNode(value []byte) any { @@ -88,7 +91,8 @@ func (applier *CommandApplier) registerNode(value []byte) any { func (applier *CommandApplier) discover(value []byte) any { instanceKey := inst.InstanceKey{} if err := json.Unmarshal(value, &instanceKey); err != nil { - return log.Errore(err) + log.Error(err) + return err } DiscoverInstance(instanceKey, false /* forceDiscovery */) return nil @@ -97,7 +101,8 @@ func (applier *CommandApplier) discover(value []byte) any { func (applier *CommandApplier) forget(value []byte) any { instanceKey := inst.InstanceKey{} if err := json.Unmarshal(value, &instanceKey); err != nil { - return log.Errore(err) + log.Error(err) + return err } err := inst.ForgetInstance(&instanceKey) return err @@ -106,7 +111,8 @@ func (applier *CommandApplier) forget(value []byte) any { func (applier *CommandApplier) forgetCluster(value []byte) any { var clusterName string if err := json.Unmarshal(value, &clusterName); err != nil { - return log.Errore(err) + log.Error(err) + return err } err := inst.ForgetCluster(clusterName) return err @@ -115,7 +121,8 @@ func (applier *CommandApplier) forgetCluster(value []byte) any { func (applier *CommandApplier) beginDowntime(value []byte) any { downtime := inst.Downtime{} if err := json.Unmarshal(value, &downtime); err != nil { - return log.Errore(err) + log.Error(err) + return err } err := inst.BeginDowntime(&downtime) return err @@ -124,7 +131,8 @@ func (applier *CommandApplier) beginDowntime(value []byte) any { func (applier *CommandApplier) endDowntime(value []byte) any { instanceKey := inst.InstanceKey{} if err := json.Unmarshal(value, &instanceKey); err != nil { - return log.Errore(err) + log.Error(err) + return err } _, err := inst.EndDowntime(&instanceKey) return err @@ -133,7 +141,8 @@ func (applier *CommandApplier) endDowntime(value []byte) any { func (applier *CommandApplier) registerCandidate(value []byte) any { candidate := inst.CandidateDatabaseInstance{} if err := json.Unmarshal(value, &candidate); err != nil { - return log.Errore(err) + log.Error(err) + return err } err := inst.RegisterCandidateInstance(&candidate) return err @@ -143,7 +152,8 @@ func (applier *CommandApplier) ackRecovery(value []byte) any { ack := RecoveryAcknowledgement{} err := json.Unmarshal(value, &ack) if err != nil { - return log.Errore(err) + log.Error(err) + return err } if ack.AllRecoveries { _, err = AcknowledgeAllRecoveries(ack.Owner, ack.Comment) @@ -166,7 +176,8 @@ func (applier *CommandApplier) ackRecovery(value []byte) any { func (applier *CommandApplier) registerHostnameUnresolve(value []byte) any { registration := inst.HostnameRegistration{} if err := json.Unmarshal(value, ®istration); err != nil { - return log.Errore(err) + log.Error(err) + return err } err := inst.RegisterHostnameUnresolve(®istration) return err @@ -175,7 +186,8 @@ func (applier *CommandApplier) registerHostnameUnresolve(value []byte) any { func (applier *CommandApplier) submitPoolInstances(value []byte) any { submission := inst.PoolInstancesSubmission{} if err := json.Unmarshal(value, &submission); err != nil { - return log.Errore(err) + log.Error(err) + return err } err := inst.ApplyPoolInstances(&submission) return err @@ -184,7 +196,8 @@ func (applier *CommandApplier) submitPoolInstances(value []byte) any { func (applier *CommandApplier) registerFailureDetection(value []byte) any { analysisEntry := inst.ReplicationAnalysis{} if err := json.Unmarshal(value, &analysisEntry); err != nil { - return log.Errore(err) + log.Error(err) + return err } _, err := AttemptFailureDetectionRegistration(&analysisEntry) return err @@ -193,7 +206,8 @@ func (applier *CommandApplier) registerFailureDetection(value []byte) any { func (applier *CommandApplier) writeRecovery(value []byte) any { topologyRecovery := TopologyRecovery{} if err := json.Unmarshal(value, &topologyRecovery); err != nil { - return log.Errore(err) + log.Error(err) + return err } if _, err := writeTopologyRecovery(&topologyRecovery); err != nil { return err @@ -204,7 +218,8 @@ func (applier *CommandApplier) writeRecovery(value []byte) any { func (applier *CommandApplier) writeRecoveryStep(value []byte) any { topologyRecoveryStep := TopologyRecoveryStep{} if err := json.Unmarshal(value, &topologyRecoveryStep); err != nil { - return log.Errore(err) + log.Error(err) + return err } err := writeTopologyRecoveryStep(&topologyRecoveryStep) return err @@ -213,10 +228,12 @@ func (applier *CommandApplier) writeRecoveryStep(value []byte) any { func (applier *CommandApplier) resolveRecovery(value []byte) any { topologyRecovery := TopologyRecovery{} if err := json.Unmarshal(value, &topologyRecovery); err != nil { - return log.Errore(err) + log.Error(err) + return err } if err := writeResolveRecovery(&topologyRecovery); err != nil { - return log.Errore(err) + log.Error(err) + return err } return nil } @@ -234,7 +251,8 @@ func (applier *CommandApplier) enableGlobalRecoveries(value []byte) any { func (applier *CommandApplier) putInstanceTag(value []byte) any { instanceTag := inst.InstanceTag{} if err := json.Unmarshal(value, &instanceTag); err != nil { - return log.Errore(err) + log.Error(err) + return err } err := inst.PutInstanceTag(&instanceTag.Key, &instanceTag.T) return err @@ -243,7 +261,8 @@ func (applier *CommandApplier) putInstanceTag(value []byte) any { func (applier *CommandApplier) deleteInstanceTag(value []byte) any { instanceTag := inst.InstanceTag{} if err := json.Unmarshal(value, &instanceTag); err != nil { - return log.Errore(err) + log.Error(err) + return err } _, err := inst.Untag(&instanceTag.Key, &instanceTag.T) return err @@ -252,7 +271,8 @@ func (applier *CommandApplier) deleteInstanceTag(value []byte) any { func (applier *CommandApplier) setClusterAliasManualOverride(value []byte) any { var params [2]string if err := json.Unmarshal(value, ¶ms); err != nil { - return log.Errore(err) + log.Error(err) + return err } clusterName, alias := params[0], params[1] err := inst.SetClusterAliasManualOverride(clusterName, alias) diff --git a/go/vt/orchestrator/logic/disable_recovery.go b/go/vt/orchestrator/logic/disable_recovery.go index 384201ef7ed..976571dc76f 100644 --- a/go/vt/orchestrator/logic/disable_recovery.go +++ b/go/vt/orchestrator/logic/disable_recovery.go @@ -30,8 +30,10 @@ package logic // go to the database each time. import ( + "fmt" + + "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/orchestrator/db" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" ) @@ -51,7 +53,9 @@ func IsRecoveryDisabled() (disabled bool, err error) { return nil }) if err != nil { - err = log.Errorf("recovery.IsRecoveryDisabled(): %v", err) + errMsg := fmt.Sprintf("recovery.IsRecoveryDisabled(): %v", err) + log.Errorf(errMsg) + err = fmt.Errorf(errMsg) } return disabled, err } @@ -76,10 +80,3 @@ func EnableRecovery() error { ) return err } - -func SetRecoveryDisabled(disabled bool) error { - if disabled { - return DisableRecovery() - } - return EnableRecovery() -} diff --git a/go/vt/orchestrator/logic/keyspace_discovery.go b/go/vt/orchestrator/logic/keyspace_discovery.go index c377378c7be..5413028e329 100644 --- a/go/vt/orchestrator/logic/keyspace_discovery.go +++ b/go/vt/orchestrator/logic/keyspace_discovery.go @@ -22,7 +22,8 @@ import ( "strings" "sync" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/orchestrator/inst" "vitess.io/vitess/go/vt/topo" ) @@ -37,7 +38,7 @@ func RefreshAllKeyspaces() { // Get all the keyspaces keyspaces, err = ts.GetKeyspaces(ctx) if err != nil { - log.Errore(err) + log.Error(err) return } } else { @@ -92,12 +93,12 @@ func RefreshKeyspace(keyspaceName string) error { func refreshKeyspace(ctx context.Context, keyspaceName string) error { keyspaceInfo, err := ts.GetKeyspace(ctx, keyspaceName) if err != nil { - log.Errore(err) + log.Error(err) return err } err = inst.SaveKeyspace(keyspaceInfo) if err != nil { - log.Errore(err) + log.Error(err) } return err } diff --git a/go/vt/orchestrator/logic/orchestrator.go b/go/vt/orchestrator/logic/orchestrator.go index 737d5c46da4..78c1f9b5bad 100644 --- a/go/vt/orchestrator/logic/orchestrator.go +++ b/go/vt/orchestrator/logic/orchestrator.go @@ -24,6 +24,8 @@ import ( "syscall" "time" + "vitess.io/vitess/go/vt/log" + "github.com/patrickmn/go-cache" "github.com/rcrowley/go-metrics" "github.com/sjmudd/stopwatch" @@ -31,7 +33,6 @@ import ( "vitess.io/vitess/go/vt/orchestrator/collection" "vitess.io/vitess/go/vt/orchestrator/config" "vitess.io/vitess/go/vt/orchestrator/discovery" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/inst" ometrics "vitess.io/vitess/go/vt/orchestrator/metrics" "vitess.io/vitess/go/vt/orchestrator/process" @@ -160,7 +161,7 @@ func handleDiscoveryRequests() { // Possibly this used to be the elected node, but has // been demoted, while still the queue is full. if !IsLeaderOrActive() { - log.Debugf("Node apparently demoted. Skipping discovery of %+v. "+ + log.Infof("Node apparently demoted. Skipping discovery of %+v. "+ "Remaining queue size: %+v", instanceKey, discoveryQueue.QueueLen()) discoveryQueue.Release(instanceKey) continue @@ -178,11 +179,11 @@ func handleDiscoveryRequests() { // replicas (if any) are also checked. func DiscoverInstance(instanceKey inst.InstanceKey, forceDiscovery bool) { if inst.InstanceIsForgotten(&instanceKey) { - log.Debugf("discoverInstance: skipping discovery of %+v because it is set to be forgotten", instanceKey) + log.Infof("discoverInstance: skipping discovery of %+v because it is set to be forgotten", instanceKey) return } if inst.RegexpMatchPatterns(instanceKey.StringCode(), config.Config.DiscoveryIgnoreHostnameFilters) { - log.Debugf("discoverInstance: skipping discovery of %+v because it matches DiscoveryIgnoreHostnameFilters", instanceKey) + log.Infof("discoverInstance: skipping discovery of %+v because it matches DiscoveryIgnoreHostnameFilters", instanceKey) return } @@ -272,7 +273,7 @@ func onHealthTick() { { myIsElectedNode, err := process.AttemptElection() if err != nil { - log.Errore(err) + log.Error(err) } if myIsElectedNode { atomic.StoreInt64(&isElectedNode, 1) @@ -292,7 +293,7 @@ func onHealthTick() { } instanceKeys, err := inst.ReadOutdatedInstanceKeys() if err != nil { - log.Errore(err) + log.Error(err) } if !wasAlreadyElected { @@ -440,7 +441,7 @@ func ContinuousDiscovery() { if runCheckAndRecoverOperationsTimeRipe() { CheckAndRecover(nil, nil, false) } else { - log.Debugf("Waiting for %+v seconds to pass before running failure detection/recovery", checkAndRecoverWaitPeriod.Seconds()) + log.Infof("Waiting for %+v seconds to pass before running failure detection/recovery", checkAndRecoverWaitPeriod.Seconds()) } }() } diff --git a/go/vt/orchestrator/logic/tablet_discovery.go b/go/vt/orchestrator/logic/tablet_discovery.go index e2e66d06a61..3937ab253bb 100644 --- a/go/vt/orchestrator/logic/tablet_discovery.go +++ b/go/vt/orchestrator/logic/tablet_discovery.go @@ -26,13 +26,14 @@ import ( "sync/atomic" "time" + "vitess.io/vitess/go/vt/log" + "google.golang.org/protobuf/encoding/prototext" "google.golang.org/protobuf/proto" "vitess.io/vitess/go/vt/orchestrator/config" "vitess.io/vitess/go/vt/orchestrator/db" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" "vitess.io/vitess/go/vt/orchestrator/inst" topodatapb "vitess.io/vitess/go/vt/proto/topodata" @@ -59,7 +60,7 @@ func OpenTabletDiscovery() <-chan time.Time { tmc = tmclient.NewTabletManagerClient() // Clear existing cache and perform a new refresh. if _, err := db.ExecOrchestrator("delete from vitess_tablet"); err != nil { - log.Errore(err) + log.Error(err) } refreshTabletsUsing(func(instanceKey *inst.InstanceKey) { _ = inst.InjectSeed(instanceKey) @@ -84,7 +85,7 @@ func refreshTabletsUsing(loader func(instanceKey *inst.InstanceKey), forceRefres defer cancel() cells, err := ts.GetKnownCells(ctx) if err != nil { - log.Errore(err) + log.Error(err) return } @@ -187,14 +188,14 @@ func refreshTablets(tablets map[string]*topo.TabletInfo, query string, args []an latestInstances[instanceKey] = true old, err := inst.ReadTablet(instanceKey) if err != nil && err != inst.ErrTabletAliasNil { - log.Errore(err) + log.Error(err) continue } if !forceRefresh && proto.Equal(tablet, old) { continue } if err := inst.SaveTablet(tablet); err != nil { - log.Errore(err) + log.Error(err) continue } loader(&instanceKey) @@ -211,7 +212,7 @@ func refreshTablets(tablets map[string]*topo.TabletInfo, query string, args []an if !latestInstances[curKey] { tablet := &topodatapb.Tablet{} if err := prototext.Unmarshal([]byte(row.GetString("info")), tablet); err != nil { - log.Errore(err) + log.Error(err) return nil } toForget[curKey] = tablet @@ -219,7 +220,7 @@ func refreshTablets(tablets map[string]*topo.TabletInfo, query string, args []an return nil }) if err != nil { - log.Errore(err) + log.Error(err) } for instanceKey, tablet := range toForget { log.Infof("Forgetting: %v", tablet) @@ -232,10 +233,10 @@ func refreshTablets(tablets map[string]*topo.TabletInfo, query string, args []an instanceKey.Port, ) if err != nil { - log.Errore(err) + log.Error(err) } if err := inst.ForgetInstance(&instanceKey); err != nil { - log.Errore(err) + log.Error(err) } } } @@ -269,24 +270,6 @@ func LockShard(ctx context.Context, instanceKey inst.InstanceKey) (context.Conte }, nil } -// TabletRefresh refreshes the tablet info. -func TabletRefresh(instanceKey inst.InstanceKey) (*topodatapb.Tablet, error) { - tablet, err := inst.ReadTablet(instanceKey) - if err != nil { - return nil, err - } - ctx, cancel := context.WithTimeout(context.Background(), *topo.RemoteOperationTimeout) - defer cancel() - ti, err := ts.GetTablet(ctx, tablet.Alias) - if err != nil { - return nil, err - } - if err := inst.SaveTablet(ti.Tablet); err != nil { - return nil, err - } - return ti.Tablet, nil -} - // tabletUndoDemotePrimary calls the said RPC for the given tablet. func tabletUndoDemotePrimary(ctx context.Context, tablet *topodatapb.Tablet, semiSync bool) error { return tmc.UndoDemotePrimary(ctx, tablet, semiSync) diff --git a/go/vt/orchestrator/logic/topology_recovery.go b/go/vt/orchestrator/logic/topology_recovery.go index d4e97d2fb05..37165dd0aa5 100644 --- a/go/vt/orchestrator/logic/topology_recovery.go +++ b/go/vt/orchestrator/logic/topology_recovery.go @@ -22,11 +22,12 @@ import ( "fmt" "math/rand" goos "os" - "sort" "strings" "sync/atomic" "time" + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/topo/topoproto" "github.com/patrickmn/go-cache" @@ -38,7 +39,6 @@ import ( "vitess.io/vitess/go/vt/logutil" "vitess.io/vitess/go/vt/orchestrator/attributes" "vitess.io/vitess/go/vt/orchestrator/config" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/inst" ometrics "vitess.io/vitess/go/vt/orchestrator/metrics" "vitess.io/vitess/go/vt/orchestrator/os" @@ -94,14 +94,6 @@ func NewRecoveryAcknowledgement(owner string, comment string) *RecoveryAcknowled } } -func NewInternalAcknowledgement() *RecoveryAcknowledgement { - return &RecoveryAcknowledgement{ - CreatedAt: time.Now(), - Owner: "orchestrator", - Comment: "internal", - } -} - // BlockedTopologyRecovery represents an entry in the blocked_topology_recovery table type BlockedTopologyRecovery struct { FailedInstanceKey inst.InstanceKey @@ -393,16 +385,6 @@ func executeProcesses(processes []string, description string, topologyRecovery * return err } -func GetPrimaryRecoveryType(analysisEntry *inst.ReplicationAnalysis) (primaryRecoveryType PrimaryRecoveryType) { - primaryRecoveryType = PrimaryRecoveryUnknown - if analysisEntry.OracleGTIDImmediateTopology || analysisEntry.MariaDBGTIDImmediateTopology { - primaryRecoveryType = PrimaryRecoveryGTID - } else if analysisEntry.BinlogServerImmediateTopology { - primaryRecoveryType = PrimaryRecoveryBinlogServer - } - return primaryRecoveryType -} - func PrimaryFailoverGeographicConstraintSatisfied(analysisEntry *inst.ReplicationAnalysis, suggestedInstance *inst.Instance) (satisfied bool, dissatisfiedReason string) { if config.Config.PreventCrossDataCenterPrimaryFailover { if suggestedInstance.DataCenter != analysisEntry.AnalyzedInstanceDataCenter { @@ -577,55 +559,6 @@ func SuggestReplacementForPromotedReplica(topologyRecovery *TopologyRecovery, de return replacement, true, err } -// replacePromotedReplicaWithCandidate is called after a primary (or co-primary) -// died and was replaced by some promotedReplica. -// But, is there an even better replica to promote? -// if candidateInstanceKey is given, then it is forced to be promoted over the promotedReplica -// Otherwise, search for the best to promote! -func replacePromotedReplicaWithCandidate(topologyRecovery *TopologyRecovery, deadInstanceKey *inst.InstanceKey, promotedReplica *inst.Instance, candidateInstanceKey *inst.InstanceKey) (*inst.Instance, error) { - candidateInstance, actionRequired, err := SuggestReplacementForPromotedReplica(topologyRecovery, deadInstanceKey, promotedReplica, candidateInstanceKey) - if err != nil { - return promotedReplica, log.Errore(err) - } - if !actionRequired { - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("replace-promoted-replica-with-candidate: promoted instance %+v requires no further action", promotedReplica.Key)) - return promotedReplica, nil - } - - // Try and promote suggested candidate, if applicable and possible - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("replace-promoted-replica-with-candidate: promoted instance %+v is not the suggested candidate %+v. Will see what can be done", promotedReplica.Key, candidateInstance.Key)) - - if candidateInstance.SourceKey.Equals(&promotedReplica.Key) { - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("replace-promoted-replica-with-candidate: suggested candidate %+v is replica of promoted instance %+v. Will try and take its primary", candidateInstance.Key, promotedReplica.Key)) - candidateInstance, err = inst.TakePrimary(&candidateInstance.Key, topologyRecovery.Type == CoPrimaryRecovery) - if err != nil { - return promotedReplica, log.Errore(err) - } - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("success promoting %+v over %+v", candidateInstance.Key, promotedReplica.Key)) - - // As followup to taking over, let's relocate all the rest of the replicas under the candidate instance - relocateReplicasFunc := func() error { - log.Debugf("replace-promoted-replica-with-candidate: relocating replicas of %+v below %+v", promotedReplica.Key, candidateInstance.Key) - - relocatedReplicas, _, _, err := inst.RelocateReplicas(&promotedReplica.Key, &candidateInstance.Key, "") - log.Debugf("replace-promoted-replica-with-candidate: + relocated %+v replicas of %+v below %+v", len(relocatedReplicas), promotedReplica.Key, candidateInstance.Key) - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("relocated %+v replicas of %+v below %+v", len(relocatedReplicas), promotedReplica.Key, candidateInstance.Key)) - return log.Errore(err) - } - postponedFunctionsContainer := &topologyRecovery.PostponedFunctionsContainer - if postponedFunctionsContainer != nil { - postponedFunctionsContainer.AddPostponedFunction(relocateReplicasFunc, fmt.Sprintf("replace-promoted-replica-with-candidate: relocate replicas of %+v", promotedReplica.Key)) - } else { - _ = relocateReplicasFunc() - // We do not propagate the error. It is logged, but otherwise should not fail the entire failover operation - } - return candidateInstance, nil - } - - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("could not manage to promoted suggested candidate %+v", candidateInstance.Key)) - return promotedReplica, nil -} - // recoverPrimaryHasPrimary resets the replication on the primary instance func recoverPrimaryHasPrimary(ctx context.Context, analysisEntry inst.ReplicationAnalysis, candidateInstanceKey *inst.InstanceKey, forceInstanceRecovery bool, skipProcesses bool) (recoveryAttempted bool, topologyRecovery *TopologyRecovery, err error) { topologyRecovery, err = AttemptRecoveryRegistration(&analysisEntry, false, true) @@ -754,49 +687,6 @@ func postErsCompletion(topologyRecovery *TopologyRecovery, analysisEntry inst.Re } } -// isGenerallyValidAsCandidateSiblingOfIntermediatePrimary sees that basic server configuration and state are valid -func isGenerallyValidAsCandidateSiblingOfIntermediatePrimary(sibling *inst.Instance) bool { - if !sibling.LogBinEnabled { - return false - } - if !sibling.LogReplicationUpdatesEnabled { - return false - } - if !sibling.ReplicaRunning() { - return false - } - if !sibling.IsLastCheckValid { - return false - } - return true -} - -// isValidAsCandidateSiblingOfIntermediatePrimary checks to see that the given sibling is capable to take over instance's replicas -func isValidAsCandidateSiblingOfIntermediatePrimary(intermediatePrimaryInstance *inst.Instance, sibling *inst.Instance) bool { - if sibling.Key.Equals(&intermediatePrimaryInstance.Key) { - // same instance - return false - } - if !isGenerallyValidAsCandidateSiblingOfIntermediatePrimary(sibling) { - return false - } - if inst.IsBannedFromBeingCandidateReplica(sibling) { - return false - } - if sibling.HasReplicationFilters != intermediatePrimaryInstance.HasReplicationFilters { - return false - } - if sibling.IsBinlogServer() != intermediatePrimaryInstance.IsBinlogServer() { - // When both are binlog servers, failover is trivial. - // When failed IM is binlog server, its sibling is still valid, but we catually prefer to just repoint the replica up -- simplest! - return false - } - if sibling.ExecBinlogCoordinates.SmallerThan(&intermediatePrimaryInstance.ExecBinlogCoordinates) { - return false - } - return true -} - func isGenerallyValidAsWouldBePrimary(replica *inst.Instance, requireLogReplicationUpdates bool) bool { if !replica.IsLastCheckValid { // something wrong with this replica right now. We shouldn't hope to be able to promote it @@ -831,288 +721,6 @@ func canTakeOverPromotedServerAsPrimary(wantToTakeOver *inst.Instance, toBeTaken return true } -// GetCandidateSiblingOfIntermediatePrimary chooses the best sibling of a dead intermediate primary -// to whom the IM's replicas can be moved. -func GetCandidateSiblingOfIntermediatePrimary(topologyRecovery *TopologyRecovery, intermediatePrimaryInstance *inst.Instance) (*inst.Instance, error) { - - siblings, err := inst.ReadReplicaInstances(&intermediatePrimaryInstance.SourceKey) - if err != nil { - return nil, err - } - if len(siblings) <= 1 { - return nil, log.Errorf("topology_recovery: no siblings found for %+v", intermediatePrimaryInstance.Key) - } - - sort.Sort(sort.Reverse(InstancesByCountReplicas(siblings))) - - // In the next series of steps we attempt to return a good replacement. - // None of the below attempts is sure to pick a winning server. Perhaps picked server is not enough up-todate -- but - // this has small likelihood in the general case, and, well, it's an attempt. It's a Plan A, but we have Plan B & C if this fails. - - // At first, we try to return an "is_candidate" server in same dc & env - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("searching for the best candidate sibling of dead intermediate primary %+v", intermediatePrimaryInstance.Key)) - for _, sibling := range siblings { - sibling := sibling - if isValidAsCandidateSiblingOfIntermediatePrimary(intermediatePrimaryInstance, sibling) && - sibling.IsCandidate && - sibling.DataCenter == intermediatePrimaryInstance.DataCenter && - sibling.PhysicalEnvironment == intermediatePrimaryInstance.PhysicalEnvironment { - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("found %+v as the ideal candidate", sibling.Key)) - return sibling, nil - } - } - // No candidate in same DC & env, let's search for a candidate anywhere - for _, sibling := range siblings { - sibling := sibling - if isValidAsCandidateSiblingOfIntermediatePrimary(intermediatePrimaryInstance, sibling) && sibling.IsCandidate { - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("found %+v as a replacement for %+v [candidate sibling]", sibling.Key, intermediatePrimaryInstance.Key)) - return sibling, nil - } - } - // Go for some valid in the same DC & ENV - for _, sibling := range siblings { - sibling := sibling - if isValidAsCandidateSiblingOfIntermediatePrimary(intermediatePrimaryInstance, sibling) && - sibling.DataCenter == intermediatePrimaryInstance.DataCenter && - sibling.PhysicalEnvironment == intermediatePrimaryInstance.PhysicalEnvironment { - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("found %+v as a replacement for %+v [same dc & environment]", sibling.Key, intermediatePrimaryInstance.Key)) - return sibling, nil - } - } - // Just whatever is valid. - for _, sibling := range siblings { - sibling := sibling - if isValidAsCandidateSiblingOfIntermediatePrimary(intermediatePrimaryInstance, sibling) { - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("found %+v as a replacement for %+v [any sibling]", sibling.Key, intermediatePrimaryInstance.Key)) - return sibling, nil - } - } - return nil, log.Errorf("topology_recovery: cannot find candidate sibling of %+v", intermediatePrimaryInstance.Key) -} - -// RecoverDeadIntermediatePrimary performs intermediate primary recovery; complete logic inside -func RecoverDeadIntermediatePrimary(topologyRecovery *TopologyRecovery, skipProcesses bool) (successorInstance *inst.Instance, err error) { - topologyRecovery.Type = IntermediatePrimaryRecovery - analysisEntry := &topologyRecovery.AnalysisEntry - failedInstanceKey := &analysisEntry.AnalyzedInstanceKey - recoveryResolved := false - - inst.AuditOperation("recover-dead-intermediate-primary", failedInstanceKey, "problem found; will recover") - if !skipProcesses { - if err := executeProcesses(config.Config.PreFailoverProcesses, "PreFailoverProcesses", topologyRecovery, true); err != nil { - return nil, topologyRecovery.AddError(err) - } - } - - intermediatePrimaryInstance, _, err := inst.ReadInstance(failedInstanceKey) - if err != nil { - return nil, topologyRecovery.AddError(err) - } - // Find possible candidate - candidateSiblingOfIntermediatePrimary, _ := GetCandidateSiblingOfIntermediatePrimary(topologyRecovery, intermediatePrimaryInstance) - relocateReplicasToCandidateSibling := func() { - if candidateSiblingOfIntermediatePrimary == nil { - return - } - // We have a candidate - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("- RecoverDeadIntermediatePrimary: will attempt a candidate intermediate primary: %+v", candidateSiblingOfIntermediatePrimary.Key)) - relocatedReplicas, candidateSibling, errs, err := inst.RelocateReplicas(failedInstanceKey, &candidateSiblingOfIntermediatePrimary.Key, "") - topologyRecovery.AddErrors(errs) - topologyRecovery.ParticipatingInstanceKeys.AddKey(candidateSiblingOfIntermediatePrimary.Key) - - if len(relocatedReplicas) == 0 { - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("- RecoverDeadIntermediatePrimary: failed to move any replica to candidate intermediate primary (%+v)", candidateSibling.Key)) - return - } - if err != nil || len(errs) > 0 { - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("- RecoverDeadIntermediatePrimary: move to candidate intermediate primary (%+v) did not complete: err: %+v, errs: %+v", candidateSibling.Key, err, errs)) - return - } - if err == nil { - recoveryResolved = true - successorInstance = candidateSibling - - inst.AuditOperation("recover-dead-intermediate-primary", failedInstanceKey, fmt.Sprintf("Relocated %d replicas under candidate sibling: %+v; %d errors: %+v", len(relocatedReplicas), candidateSibling.Key, len(errs), errs)) - } - } - // Plan A: find a replacement intermediate primary in same Data Center - if candidateSiblingOfIntermediatePrimary != nil && candidateSiblingOfIntermediatePrimary.DataCenter == intermediatePrimaryInstance.DataCenter { - relocateReplicasToCandidateSibling() - } - if !recoveryResolved { - AuditTopologyRecovery(topologyRecovery, "- RecoverDeadIntermediatePrimary: will next attempt regrouping of replicas") - // Plan B: regroup (we wish to reduce cross-DC replication streams) - lostReplicas, _, _, _, regroupPromotedReplica, regroupError := inst.RegroupReplicas(failedInstanceKey, true, nil, nil) - if regroupError != nil { - topologyRecovery.AddError(regroupError) - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("- RecoverDeadIntermediatePrimary: regroup failed on: %+v", regroupError)) - } - if regroupPromotedReplica != nil { - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("- RecoverDeadIntermediatePrimary: regrouped under %+v, with %d lost replicas", regroupPromotedReplica.Key, len(lostReplicas))) - topologyRecovery.ParticipatingInstanceKeys.AddKey(regroupPromotedReplica.Key) - if len(lostReplicas) == 0 && regroupError == nil { - // Seems like the regroup worked flawlessly. The local replica took over all of its siblings. - // We can consider this host to be the successor. - successorInstance = regroupPromotedReplica - } - } - // Plan C: try replacement intermediate primary in other DC... - if candidateSiblingOfIntermediatePrimary != nil && candidateSiblingOfIntermediatePrimary.DataCenter != intermediatePrimaryInstance.DataCenter { - AuditTopologyRecovery(topologyRecovery, "- RecoverDeadIntermediatePrimary: will next attempt relocating to another DC server") - relocateReplicasToCandidateSibling() - } - } - if !recoveryResolved { - // Do we still have leftovers? some replicas couldn't move? Couldn't regroup? Only left with regroup's resulting leader? - // nothing moved? - // We don't care much if regroup made it or not. We prefer that it made it, in which case we only need to relocate up - // one replica, but the operation is still valid if regroup partially/completely failed. We just promote anything - // not regrouped. - // So, match up all that's left, plan D - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("- RecoverDeadIntermediatePrimary: will next attempt to relocate up from %+v", *failedInstanceKey)) - - relocatedReplicas, primaryInstance, errs, _ := inst.RelocateReplicas(failedInstanceKey, &analysisEntry.AnalyzedInstancePrimaryKey, "") - topologyRecovery.AddErrors(errs) - topologyRecovery.ParticipatingInstanceKeys.AddKey(analysisEntry.AnalyzedInstancePrimaryKey) - - if len(relocatedReplicas) > 0 { - recoveryResolved = true - if successorInstance == nil { - // There could have been a local replica taking over its siblings. We'd like to consider that one as successor. - successorInstance = primaryInstance - } - inst.AuditOperation("recover-dead-intermediate-primary", failedInstanceKey, fmt.Sprintf("Relocated replicas under: %+v %d errors: %+v", successorInstance.Key, len(errs), errs)) - } else { - err = log.Errorf("topology_recovery: RecoverDeadIntermediatePrimary failed to match up any replica from %+v", *failedInstanceKey) - topologyRecovery.AddError(err) - } - } - if !recoveryResolved { - successorInstance = nil - } - resolveRecovery(topologyRecovery, successorInstance) - return successorInstance, err -} - -// RecoverDeadCoPrimary recovers a dead co-primary, complete logic inside -func RecoverDeadCoPrimary(topologyRecovery *TopologyRecovery, skipProcesses bool) (promotedReplica *inst.Instance, lostReplicas [](*inst.Instance), err error) { - topologyRecovery.Type = CoPrimaryRecovery - analysisEntry := &topologyRecovery.AnalysisEntry - failedInstanceKey := &analysisEntry.AnalyzedInstanceKey - otherCoPrimaryKey := &analysisEntry.AnalyzedInstancePrimaryKey - otherCoPrimary, found, _ := inst.ReadInstance(otherCoPrimaryKey) - if otherCoPrimary == nil || !found { - return nil, lostReplicas, topologyRecovery.AddError(log.Errorf("RecoverDeadCoPrimary: could not read info for co-primary %+v of %+v", *otherCoPrimaryKey, *failedInstanceKey)) - } - inst.AuditOperation("recover-dead-co-primary", failedInstanceKey, "problem found; will recover") - if !skipProcesses { - if err := executeProcesses(config.Config.PreFailoverProcesses, "PreFailoverProcesses", topologyRecovery, true); err != nil { - return nil, lostReplicas, topologyRecovery.AddError(err) - } - } - - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("RecoverDeadCoPrimary: will recover %+v", *failedInstanceKey)) - - var coPrimaryRecoveryType = PrimaryRecoveryUnknown - if analysisEntry.OracleGTIDImmediateTopology || analysisEntry.MariaDBGTIDImmediateTopology { - coPrimaryRecoveryType = PrimaryRecoveryGTID - } - - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("RecoverDeadCoPrimary: coPrimaryRecoveryType=%+v", coPrimaryRecoveryType)) - - var cannotReplicateReplicas [](*inst.Instance) - switch coPrimaryRecoveryType { - case PrimaryRecoveryUnknown: - { - return nil, lostReplicas, topologyRecovery.AddError(log.Errorf("RecoverDeadCoPrimary: RecoveryType unknown/unsupported")) - } - case PrimaryRecoveryGTID: - { - lostReplicas, _, cannotReplicateReplicas, promotedReplica, err = inst.RegroupReplicasGTID(failedInstanceKey, true, nil, &topologyRecovery.PostponedFunctionsContainer, nil) - } - } - topologyRecovery.AddError(err) - lostReplicas = append(lostReplicas, cannotReplicateReplicas...) - - mustPromoteOtherCoPrimary := config.Config.CoPrimaryRecoveryMustPromoteOtherCoPrimary - if !otherCoPrimary.ReadOnly { - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("RecoverDeadCoPrimary: other co-primary %+v is writeable hence has to be promoted", otherCoPrimary.Key)) - mustPromoteOtherCoPrimary = true - } - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("RecoverDeadCoPrimary: mustPromoteOtherCoPrimary? %+v", mustPromoteOtherCoPrimary)) - - if promotedReplica != nil { - topologyRecovery.ParticipatingInstanceKeys.AddKey(promotedReplica.Key) - if mustPromoteOtherCoPrimary { - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("RecoverDeadCoPrimary: mustPromoteOtherCoPrimary. Verifying that %+v is/can be promoted", *otherCoPrimaryKey)) - promotedReplica, err = replacePromotedReplicaWithCandidate(topologyRecovery, failedInstanceKey, promotedReplica, otherCoPrimaryKey) - } else { - // We are allowed to promote any server - promotedReplica, err = replacePromotedReplicaWithCandidate(topologyRecovery, failedInstanceKey, promotedReplica, nil) - } - topologyRecovery.AddError(err) - } - if promotedReplica != nil { - if mustPromoteOtherCoPrimary && !promotedReplica.Key.Equals(otherCoPrimaryKey) { - topologyRecovery.AddError(log.Errorf("RecoverDeadCoPrimary: could not manage to promote other-co-primary %+v; was only able to promote %+v; mustPromoteOtherCoPrimary is true (either CoPrimaryRecoveryMustPromoteOtherCoPrimary is true, or co-primary is writeable), therefore failing", *otherCoPrimaryKey, promotedReplica.Key)) - promotedReplica = nil - } - } - if promotedReplica != nil { - if config.Config.DelayPrimaryPromotionIfSQLThreadNotUpToDate { - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("Waiting to ensure the SQL thread catches up on %+v", promotedReplica.Key)) - if _, err := inst.WaitForSQLThreadUpToDate(&promotedReplica.Key, 0, 0); err != nil { - return promotedReplica, lostReplicas, err - } - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("SQL thread caught up on %+v", promotedReplica.Key)) - } - topologyRecovery.ParticipatingInstanceKeys.AddKey(promotedReplica.Key) - } - - // OK, we may have someone promoted. Either this was the other co-primary or another replica. - // Noting down that we DO NOT attempt to set a new co-primary topology. We are good with remaining with a single primary. - // I tried solving the "let's promote a replica and create a new co-primary setup" but this turns so complex due to various factors. - // I see this as risky and not worth the questionable benefit. - // Maybe future me is a smarter person and finds a simple solution. Unlikely. I'm getting dumber. - // - // ... - // Now that we're convinved, take a look at what we can be left with: - // Say we started with M1<->M2<-S1, with M2 failing, and we promoted S1. - // We now have M1->S1 (because S1 is promoted), S1->M2 (because that's what it remembers), M2->M1 (because that's what it remembers) - // !! This is an evil 3-node circle that must be broken. - // config.Config.ApplyMySQLPromotionAfterPrimaryFailover, if true, will cause it to break, because we would RESET SLAVE on S1 - // but we want to make sure the circle is broken no matter what. - // So in the case we promoted not-the-other-co-primary, we issue a detach-replica-primary-host, which is a reversible operation - if promotedReplica != nil && !promotedReplica.Key.Equals(otherCoPrimaryKey) { - _, err = inst.DetachReplicaPrimaryHost(&promotedReplica.Key) - topologyRecovery.AddError(log.Errore(err)) - } - - if promotedReplica != nil && len(lostReplicas) > 0 && config.Config.DetachLostReplicasAfterPrimaryFailover { - postponedFunction := func() error { - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("- RecoverDeadCoPrimary: lost %+v replicas during recovery process; detaching them", len(lostReplicas))) - for _, replica := range lostReplicas { - replica := replica - inst.DetachReplicaPrimaryHost(&replica.Key) - } - return nil - } - topologyRecovery.AddPostponedFunction(postponedFunction, fmt.Sprintf("RecoverDeadCoPrimary, detaching %+v replicas", len(lostReplicas))) - } - - func() error { - inst.BeginDowntime(inst.NewDowntime(failedInstanceKey, inst.GetMaintenanceOwner(), inst.DowntimeLostInRecoveryMessage, time.Duration(config.LostInRecoveryDowntimeSeconds)*time.Second)) - acknowledgeInstanceFailureDetection(&analysisEntry.AnalyzedInstanceKey) - for _, replica := range lostReplicas { - replica := replica - inst.BeginDowntime(inst.NewDowntime(&replica.Key, inst.GetMaintenanceOwner(), inst.DowntimeLostInRecoveryMessage, time.Duration(config.LostInRecoveryDowntimeSeconds)*time.Second)) - } - return nil - }() - - return promotedReplica, lostReplicas, err -} - // checkAndRecoverGenericProblem is a general-purpose recovery function func checkAndRecoverLockedSemiSyncPrimary(ctx context.Context, analysisEntry inst.ReplicationAnalysis, candidateInstanceKey *inst.InstanceKey, forceInstanceRecovery bool, skipProcesses bool) (recoveryAttempted bool, topologyRecovery *TopologyRecovery, err error) { return false, nil, nil @@ -1193,7 +801,7 @@ func emergentlyRestartReplicationOnTopologyInstanceReplicas(instanceKey *inst.In func emergentlyRecordStaleBinlogCoordinates(instanceKey *inst.InstanceKey, binlogCoordinates *inst.BinlogCoordinates) { err := inst.RecordStaleInstanceBinlogCoordinates(instanceKey, binlogCoordinates) - log.Errore(err) + log.Error(err) } // checkAndExecuteFailureDetectionProcesses tries to register for failure detection and potentially executes @@ -1438,7 +1046,8 @@ func CheckAndRecover(specificInstance *inst.InstanceKey, candidateInstanceKey *i // Allow the analysis to run even if we don't want to recover replicationAnalysis, err := inst.GetReplicationAnalysis("", &inst.ReplicationAnalysisHints{IncludeDowntimed: true, AuditAnalysis: true}) if err != nil { - return false, nil, log.Errore(err) + log.Error(err) + return false, nil, err } if *config.RuntimeCLIFlags.Noop { log.Infof("--noop provided; will not execute processes") @@ -1462,14 +1071,14 @@ func CheckAndRecover(specificInstance *inst.InstanceKey, candidateInstanceKey *i // force mode. Keep it synchronuous var topologyRecovery *TopologyRecovery recoveryAttempted, topologyRecovery, err = executeCheckAndRecoverFunction(analysisEntry, candidateInstanceKey, true, skipProcesses) - log.Errore(err) + log.Error(err) if topologyRecovery != nil { promotedReplicaKey = topologyRecovery.SuccessorKey } } else { go func() { _, _, err := executeCheckAndRecoverFunction(analysisEntry, candidateInstanceKey, false, skipProcesses) - log.Errore(err) + log.Error(err) }() } } diff --git a/go/vt/orchestrator/logic/topology_recovery_dao.go b/go/vt/orchestrator/logic/topology_recovery_dao.go index 63e7d168304..0e9327a4070 100644 --- a/go/vt/orchestrator/logic/topology_recovery_dao.go +++ b/go/vt/orchestrator/logic/topology_recovery_dao.go @@ -20,9 +20,10 @@ import ( "fmt" "strings" + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/orchestrator/config" "vitess.io/vitess/go/vt/orchestrator/db" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" "vitess.io/vitess/go/vt/orchestrator/inst" "vitess.io/vitess/go/vt/orchestrator/process" @@ -84,11 +85,13 @@ func AttemptFailureDetectionRegistration(analysisEntry *inst.ReplicationAnalysis sqlResult, err := db.ExecOrchestrator(query, args...) if err != nil { - return false, log.Errore(err) + log.Error(err) + return false, err } rows, err := sqlResult.RowsAffected() if err != nil { - return false, log.Errore(err) + log.Error(err) + return false, err } return (rows > 0), nil } @@ -106,7 +109,8 @@ func ClearActiveFailureDetections() error { `, config.Config.FailureDetectionPeriodBlockMinutes, ) - return log.Errore(err) + log.Error(err) + return err } // clearAcknowledgedFailureDetections clears the "in_active_period" flag for detections @@ -121,19 +125,8 @@ func clearAcknowledgedFailureDetections(whereClause string, args []any) error { and %s `, whereClause) _, err := db.ExecOrchestrator(query, args...) - return log.Errore(err) -} - -// AcknowledgeInstanceFailureDetection clears a failure detection for a particular -// instance. This is automated by recovery process: it makes sense to acknowledge -// the detection of an instance just recovered. -func acknowledgeInstanceFailureDetection(instanceKey *inst.InstanceKey) error { - whereClause := ` - hostname = ? - and port = ? - ` - args := sqlutils.Args(instanceKey.Hostname, instanceKey.Port) - return clearAcknowledgedFailureDetections(whereClause, args) + log.Error(err) + return err } func writeTopologyRecovery(topologyRecovery *TopologyRecovery) (*TopologyRecovery, error) { @@ -209,11 +202,14 @@ func AttemptRecoveryRegistration(analysisEntry *inst.ReplicationAnalysis, failIf // If so, we reject recovery registration to avoid flapping. recoveries, err := ReadInActivePeriodSuccessorInstanceRecovery(&analysisEntry.AnalyzedInstanceKey) if err != nil { - return nil, log.Errore(err) + log.Error(err) + return nil, err } if len(recoveries) > 0 { - RegisterBlockedRecoveries(analysisEntry, recoveries) - return nil, log.Errorf("AttemptRecoveryRegistration: instance %+v has recently been promoted (by failover of %+v) and is in active period. It will not be failed over. You may acknowledge the failure on %+v (-c ack-instance-recoveries) to remove this blockage", analysisEntry.AnalyzedInstanceKey, recoveries[0].AnalysisEntry.AnalyzedInstanceKey, recoveries[0].AnalysisEntry.AnalyzedInstanceKey) + _ = RegisterBlockedRecoveries(analysisEntry, recoveries) + errMsg := fmt.Sprintf("AttemptRecoveryRegistration: instance %+v has recently been promoted (by failover of %+v) and is in active period. It will not be failed over. You may acknowledge the failure on %+v (-c ack-instance-recoveries) to remove this blockage", analysisEntry.AnalyzedInstanceKey, recoveries[0].AnalysisEntry.AnalyzedInstanceKey, recoveries[0].AnalysisEntry.AnalyzedInstanceKey) + log.Errorf(errMsg) + return nil, fmt.Errorf(errMsg) } } if failIfClusterInActiveRecovery { @@ -221,16 +217,19 @@ func AttemptRecoveryRegistration(analysisEntry *inst.ReplicationAnalysis, failIf // If so, we reject recovery registration to avoid flapping. recoveries, err := ReadInActivePeriodClusterRecovery(analysisEntry.ClusterDetails.ClusterName) if err != nil { - return nil, log.Errore(err) + log.Error(err) + return nil, err } if len(recoveries) > 0 { - RegisterBlockedRecoveries(analysisEntry, recoveries) - return nil, log.Errorf("AttemptRecoveryRegistration: cluster %+v has recently experienced a failover (of %+v) and is in active period. It will not be failed over again. You may acknowledge the failure on this cluster (-c ack-cluster-recoveries) or on %+v (-c ack-instance-recoveries) to remove this blockage", analysisEntry.ClusterDetails.ClusterName, recoveries[0].AnalysisEntry.AnalyzedInstanceKey, recoveries[0].AnalysisEntry.AnalyzedInstanceKey) + _ = RegisterBlockedRecoveries(analysisEntry, recoveries) + errMsg := fmt.Sprintf("AttemptRecoveryRegistration: cluster %+v has recently experienced a failover (of %+v) and is in active period. It will not be failed over again. You may acknowledge the failure on this cluster (-c ack-cluster-recoveries) or on %+v (-c ack-instance-recoveries) to remove this blockage", analysisEntry.ClusterDetails.ClusterName, recoveries[0].AnalysisEntry.AnalyzedInstanceKey, recoveries[0].AnalysisEntry.AnalyzedInstanceKey) + log.Errorf(errMsg) + return nil, fmt.Errorf(errMsg) } } if !failIfFailedInstanceInActiveRecovery { // Implicitly acknowledge this instance's possibly existing active recovery, provided they are completed. - AcknowledgeInstanceCompletedRecoveries(&analysisEntry.AnalyzedInstanceKey, "orchestrator", fmt.Sprintf("implicit acknowledge due to user invocation of recovery on same instance: %+v", analysisEntry.AnalyzedInstanceKey)) + _, _ = AcknowledgeInstanceCompletedRecoveries(&analysisEntry.AnalyzedInstanceKey, "orchestrator", fmt.Sprintf("implicit acknowledge due to user invocation of recovery on same instance: %+v", analysisEntry.AnalyzedInstanceKey)) // The fact we only acknowledge a completed recovery solves the possible case of two DBAs simultaneously // trying to recover the same instance at the same time } @@ -239,7 +238,8 @@ func AttemptRecoveryRegistration(analysisEntry *inst.ReplicationAnalysis, failIf topologyRecovery, err := writeTopologyRecovery(topologyRecovery) if err != nil { - return nil, log.Errore(err) + log.Error(err) + return nil, err } return topologyRecovery, nil } @@ -257,7 +257,8 @@ func ClearActiveRecoveries() error { `, config.Config.RecoveryPeriodBlockSeconds, ) - return log.Errore(err) + log.Error(err) + return err } // RegisterBlockedRecoveries writes down currently blocked recoveries, and indicates what recovery they are blocked on. @@ -293,7 +294,7 @@ func RegisterBlockedRecoveries(analysisEntry *inst.ReplicationAnalysis, blocking recovery.ID, ) if err != nil { - log.Errore(err) + log.Error(err) } } return nil @@ -333,12 +334,14 @@ func ExpireBlockedRecoveries() error { expiredKey.Hostname, expiredKey.Port, ) if err != nil { - return log.Errore(err) + log.Error(err) + return err } } if err != nil { - return log.Errore(err) + log.Error(err) + return err } // Some oversampling, if a problem has not been noticed for some time (e.g. the server came up alive // before action was taken), expire it. @@ -350,7 +353,8 @@ func ExpireBlockedRecoveries() error { last_blocked_timestamp < NOW() - interval ? second `, (config.RecoveryPollSeconds * 2), ) - return log.Errore(err) + log.Error(err) + return err } // acknowledgeRecoveries sets acknowledged* details and clears the in_active_period flags from a set of entries @@ -378,10 +382,12 @@ func acknowledgeRecoveries(owner string, comment string, markEndRecovery bool, w args = append(sqlutils.Args(owner, comment), args...) sqlResult, err := db.ExecOrchestrator(query, args...) if err != nil { - return 0, log.Errore(err) + log.Error(err) + return 0, err } rows, err := sqlResult.RowsAffected() - return rows, log.Errore(err) + log.Error(err) + return rows, err } // AcknowledgeAllRecoveries acknowledges all unacknowledged recoveries. @@ -410,7 +416,7 @@ func AcknowledgeClusterRecoveries(clusterName string, owner string, comment stri { whereClause := `cluster_name = ?` args := sqlutils.Args(clusterName) - clearAcknowledgedFailureDetections(whereClause, args) + _ = clearAcknowledgedFailureDetections(whereClause, args) count, err := acknowledgeRecoveries(owner, comment, false, whereClause, args) if err != nil { return count, err @@ -421,7 +427,7 @@ func AcknowledgeClusterRecoveries(clusterName string, owner string, comment stri clusterInfo, _ := inst.ReadClusterInfo(clusterName) whereClause := `cluster_alias = ? and cluster_alias != ''` args := sqlutils.Args(clusterInfo.ClusterAlias) - clearAcknowledgedFailureDetections(whereClause, args) + _ = clearAcknowledgedFailureDetections(whereClause, args) count, err := acknowledgeRecoveries(owner, comment, false, whereClause, args) if err != nil { return count, err @@ -440,7 +446,7 @@ func AcknowledgeInstanceRecoveries(instanceKey *inst.InstanceKey, owner string, and port = ? ` args := sqlutils.Args(instanceKey.Hostname, instanceKey.Port) - clearAcknowledgedFailureDetections(whereClause, args) + _ = clearAcknowledgedFailureDetections(whereClause, args) return acknowledgeRecoveries(owner, comment, false, whereClause, args) } @@ -492,7 +498,8 @@ func writeResolveRecovery(topologyRecovery *TopologyRecovery) error { strings.Join(topologyRecovery.AllErrors, "\n"), topologyRecovery.UID, ) - return log.Errore(err) + log.Error(err) + return err } // readRecoveries reads recovery entry/audit entries from topology_recovery @@ -552,7 +559,7 @@ func readRecoveries(whereCondition string, limit string, args []any) ([]*Topolog topologyRecovery.AnalysisEntry.ClusterDetails.ClusterName = m.GetString("cluster_name") topologyRecovery.AnalysisEntry.ClusterDetails.ClusterAlias = m.GetString("cluster_alias") topologyRecovery.AnalysisEntry.CountReplicas = m.GetUint("count_affected_replicas") - topologyRecovery.AnalysisEntry.ReadReplicaHostsFromString(m.GetString("replica_hosts")) + _ = topologyRecovery.AnalysisEntry.ReadReplicaHostsFromString(m.GetString("replica_hosts")) topologyRecovery.SuccessorKey = &inst.InstanceKey{} topologyRecovery.SuccessorKey.Hostname = m.GetString("successor_hostname") @@ -562,8 +569,8 @@ func readRecoveries(whereCondition string, limit string, args []any) ([]*Topolog topologyRecovery.AnalysisEntry.ClusterDetails.ReadRecoveryInfo() topologyRecovery.AllErrors = strings.Split(m.GetString("all_errors"), "\n") - topologyRecovery.LostReplicas.ReadCommaDelimitedList(m.GetString("lost_replicas")) - topologyRecovery.ParticipatingInstanceKeys.ReadCommaDelimitedList(m.GetString("participating_instances")) + _ = topologyRecovery.LostReplicas.ReadCommaDelimitedList(m.GetString("lost_replicas")) + _ = topologyRecovery.ParticipatingInstanceKeys.ReadCommaDelimitedList(m.GetString("participating_instances")) topologyRecovery.Acknowledged = m.GetBool("acknowledged") topologyRecovery.AcknowledgedAt = m.GetString("acknowledged_at") @@ -576,7 +583,8 @@ func readRecoveries(whereCondition string, limit string, args []any) ([]*Topolog return nil }) - return res, log.Errore(err) + log.Error(err) + return res, err } // ReadActiveRecoveries reads active recovery entry/audit entries from topology_recovery @@ -629,23 +637,6 @@ func ReadRecentlyActiveInstanceRecovery(instanceKey *inst.InstanceKey) ([]*Topol return readRecoveries(whereClause, ``, sqlutils.Args(instanceKey.Hostname, instanceKey.Port)) } -// ReadActiveRecoveries reads active recovery entry/audit entries from topology_recovery -func ReadActiveRecoveries() ([]*TopologyRecovery, error) { - return readRecoveries(` - where - in_active_period=1 - and end_recovery is null`, - ``, sqlutils.Args()) -} - -// ReadCompletedRecoveries reads completed recovery entry/audit entries from topology_recovery -func ReadCompletedRecoveries(page int) ([]*TopologyRecovery, error) { - limit := ` - limit ? - offset ?` - return readRecoveries(`where end_recovery is not null`, limit, sqlutils.Args(config.AuditPageSize, page*config.AuditPageSize)) -} - // ReadRecovery reads completed recovery entry/audit entries from topology_recovery func ReadRecovery(recoveryID int64) ([]*TopologyRecovery, error) { whereClause := `where recovery_id = ?` @@ -658,7 +649,7 @@ func ReadRecoveryByUID(recoveryUID string) ([]*TopologyRecovery, error) { return readRecoveries(whereClause, ``, sqlutils.Args(recoveryUID)) } -// ReadCRecoveries reads latest recovery entries from topology_recovery +// ReadRecentRecoveries reads latest recovery entries from topology_recovery func ReadRecentRecoveries(clusterName string, clusterAlias string, unacknowledgedOnly bool, page int) ([]*TopologyRecovery, error) { whereConditions := []string{} whereClause := "" @@ -724,7 +715,7 @@ func readFailureDetections(whereCondition string, limit string, args []any) ([]* failureDetection.AnalysisEntry.ClusterDetails.ClusterName = m.GetString("cluster_name") failureDetection.AnalysisEntry.ClusterDetails.ClusterAlias = m.GetString("cluster_alias") failureDetection.AnalysisEntry.CountReplicas = m.GetUint("count_affected_replicas") - failureDetection.AnalysisEntry.ReadReplicaHostsFromString(m.GetString("replica_hosts")) + _ = failureDetection.AnalysisEntry.ReadReplicaHostsFromString(m.GetString("replica_hosts")) failureDetection.AnalysisEntry.StartActivePeriod = m.GetString("start_active_period") failureDetection.RelatedRecoveryID = m.GetInt64("related_recovery_id") @@ -735,7 +726,8 @@ func readFailureDetections(whereCondition string, limit string, args []any) ([]* return nil }) - return res, log.Errore(err) + log.Error(err) + return res, err } // ReadRecentFailureDetections @@ -795,7 +787,8 @@ func ReadBlockedRecoveries(clusterName string) ([]BlockedTopologyRecovery, error return nil }) - return res, log.Errore(err) + log.Error(err) + return res, err } // writeTopologyRecoveryStep writes down a single step in a recovery process @@ -808,10 +801,12 @@ func writeTopologyRecoveryStep(topologyRecoveryStep *TopologyRecoveryStep) error `, sqlutils.NilIfZero(topologyRecoveryStep.ID), topologyRecoveryStep.RecoveryUID, topologyRecoveryStep.Message, ) if err != nil { - return log.Errore(err) + log.Error(err) + return err } topologyRecoveryStep.ID, err = sqlResult.LastInsertId() - return log.Errore(err) + log.Error(err) + return err } // ReadTopologyRecoverySteps reads recovery steps for a given recovery @@ -837,7 +832,8 @@ func ReadTopologyRecoverySteps(recoveryUID string) ([]TopologyRecoveryStep, erro res = append(res, recoveryStep) return nil }) - return res, log.Errore(err) + log.Error(err) + return res, err } // ExpireFailureDetectionHistory removes old rows from the topology_failure_detection table diff --git a/go/vt/orchestrator/os/process.go b/go/vt/orchestrator/os/process.go index 16eb8e49c90..d9b7e650128 100644 --- a/go/vt/orchestrator/os/process.go +++ b/go/vt/orchestrator/os/process.go @@ -23,12 +23,10 @@ import ( "strings" "syscall" + "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/orchestrator/config" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" ) -var EmptyEnv []string - // CommandRun executes some text as a command. This is assumed to be // text that will be run by a shell so we need to write out the // command to a temporary file and then ask the shell to execute @@ -40,7 +38,8 @@ func CommandRun(commandText string, env []string, arguments ...string) error { cmd, shellScript, err := generateShellScript(commandText, env, arguments...) defer os.Remove(shellScript) if err != nil { - return log.Errore(err) + log.Error(err) + return err } var waitStatus syscall.WaitStatus @@ -55,7 +54,9 @@ func CommandRun(commandText string, env []string, arguments ...string) error { log.Errorf("CommandRun: failed. exit status %d", waitStatus.ExitStatus()) } - return log.Errore(fmt.Errorf("(%s) %s", err.Error(), cmdOutput)) + errMsg := fmt.Sprintf("(%s) %s", err.Error(), cmdOutput) + log.Error(errMsg) + return fmt.Errorf(errMsg) } // Command was successful @@ -75,7 +76,9 @@ func generateShellScript(commandText string, env []string, arguments ...string) commandBytes := []byte(commandText) tmpFile, err := os.CreateTemp("", "orchestrator-process-cmd-") if err != nil { - return nil, "", log.Errorf("generateShellScript() failed to create TempFile: %v", err.Error()) + errMsg := fmt.Sprintf("generateShellScript() failed to create TempFile: %v", err.Error()) + log.Errorf(errMsg) + return nil, "", fmt.Errorf(errMsg) } // write commandText to temporary file os.WriteFile(tmpFile.Name(), commandBytes, 0640) diff --git a/go/vt/orchestrator/os/unixcheck.go b/go/vt/orchestrator/os/unixcheck.go index 2033e5bd780..244726c486f 100644 --- a/go/vt/orchestrator/os/unixcheck.go +++ b/go/vt/orchestrator/os/unixcheck.go @@ -20,7 +20,7 @@ import ( "os/user" "strings" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" + "vitess.io/vitess/go/vt/log" ) // UserInGroups checks if the given username is in the given unix diff --git a/go/vt/orchestrator/process/access_token_dao.go b/go/vt/orchestrator/process/access_token_dao.go index 7e09fdc070d..0581a0eda37 100644 --- a/go/vt/orchestrator/process/access_token_dao.go +++ b/go/vt/orchestrator/process/access_token_dao.go @@ -17,9 +17,11 @@ package process import ( + "fmt" + + "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/orchestrator/config" "vitess.io/vitess/go/vt/orchestrator/db" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" "vitess.io/vitess/go/vt/orchestrator/util" ) @@ -40,7 +42,8 @@ func GenerateAccessToken(owner string) (publicToken string, err error) { publicToken, secretToken, owner, ) if err != nil { - return publicToken, log.Errore(err) + log.Error(err) + return publicToken, err } return publicToken, nil } @@ -67,14 +70,18 @@ func AcquireAccessToken(publicToken string) (secretToken string, err error) { publicToken, config.Config.AccessTokenUseExpirySeconds, ) if err != nil { - return secretToken, log.Errore(err) + log.Error(err) + return secretToken, err } rows, err := sqlResult.RowsAffected() if err != nil { - return secretToken, log.Errore(err) + log.Error(err) + return secretToken, err } if rows == 0 { - return secretToken, log.Errorf("Cannot acquire token %s", publicToken) + errMsg := fmt.Sprintf("Cannot acquire token %s", publicToken) + log.Errorf(errMsg) + return secretToken, fmt.Errorf(errMsg) } // Seems like we made it! query := ` @@ -84,7 +91,8 @@ func AcquireAccessToken(publicToken string) (secretToken string, err error) { secretToken = m.GetString("secret_token") return nil }) - return secretToken, log.Errore(err) + log.Error(err) + return secretToken, err } // TokenIsValid checks to see whether a given token exists and is not outdated. @@ -106,7 +114,8 @@ func TokenIsValid(publicToken string, secretToken string) (result bool, err erro result = m.GetInt("valid_token") > 0 return nil }) - return result, log.Errore(err) + log.Error(err) + return result, err } // ExpireAccessTokens removes old, known to be uneligible tokens @@ -120,5 +129,6 @@ func ExpireAccessTokens() error { `, config.Config.AccessTokenExpiryMinutes, ) - return log.Errore(err) + log.Error(err) + return err } diff --git a/go/vt/orchestrator/process/election_dao.go b/go/vt/orchestrator/process/election_dao.go index f0aa969c3f5..b35f65fa6ec 100644 --- a/go/vt/orchestrator/process/election_dao.go +++ b/go/vt/orchestrator/process/election_dao.go @@ -17,9 +17,9 @@ package process import ( + "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/orchestrator/config" "vitess.io/vitess/go/vt/orchestrator/db" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" "vitess.io/vitess/go/vt/orchestrator/util" ) @@ -37,11 +37,13 @@ func AttemptElection() (bool, error) { ThisHostname, util.ProcessToken.Hash, ) if err != nil { - return false, log.Errore(err) + log.Error(err) + return false, err } rows, err := sqlResult.RowsAffected() if err != nil { - return false, log.Errore(err) + log.Error(err) + return false, err } if rows > 0 { // We managed to insert a row @@ -63,11 +65,13 @@ func AttemptElection() (bool, error) { ThisHostname, util.ProcessToken.Hash, config.ActiveNodeExpireSeconds, ) if err != nil { - return false, log.Errore(err) + log.Error(err) + return false, err } rows, err := sqlResult.RowsAffected() if err != nil { - return false, log.Errore(err) + log.Error(err) + return false, err } if rows > 0 { // We managed to update a row: overtaking a previous leader @@ -87,11 +91,13 @@ func AttemptElection() (bool, error) { ThisHostname, util.ProcessToken.Hash, ) if err != nil { - return false, log.Errore(err) + log.Error(err) + return false, err } rows, err := sqlResult.RowsAffected() if err != nil { - return false, log.Errore(err) + log.Error(err) + return false, err } if rows > 0 { // Reaffirmed our own leadership @@ -112,13 +118,15 @@ func GrabElection() error { `, ThisHostname, util.ProcessToken.Hash, ) - return log.Errore(err) + log.Error(err) + return err } // Reelect clears the way for re-elections. Active node is immediately demoted. func Reelect() error { _, err := db.ExecOrchestrator(`delete from active_node where anchor = 1`) - return log.Errore(err) + log.Error(err) + return err } // ElectedNode returns the details of the elected node, as well as answering the question "is this process the elected one"? @@ -145,5 +153,6 @@ func ElectedNode() (node *NodeHealth, isElected bool, err error) { }) isElected = (node.Hostname == ThisHostname && node.Token == util.ProcessToken.Hash) - return node, isElected, log.Errore(err) //nolint copylocks: return copies lock value + log.Error(err) + return node, isElected, err //nolint copylocks: return copies lock value } diff --git a/go/vt/orchestrator/process/health.go b/go/vt/orchestrator/process/health.go index a20c188a9fa..808d1d89e95 100644 --- a/go/vt/orchestrator/process/health.go +++ b/go/vt/orchestrator/process/health.go @@ -21,12 +21,12 @@ import ( "sync/atomic" "time" + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/orchestrator/config" "vitess.io/vitess/go/vt/orchestrator/util" "github.com/patrickmn/go-cache" - - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" ) var lastHealthCheckUnixNano int64 @@ -117,35 +117,21 @@ func HealthTest() (health *HealthStatus, err error) { healthy, err := RegisterNode(ThisNodeHealth) if err != nil { health.Error = err - return health, log.Errore(err) + log.Error(err) + return health, err } health.Healthy = healthy if health.ActiveNode, health.IsActiveNode, err = ElectedNode(); err != nil { health.Error = err - return health, log.Errore(err) + log.Error(err) + return health, err } health.AvailableNodes, _ = ReadAvailableNodes(true) return health, nil } -func SinceLastHealthCheck() time.Duration { - timeNano := atomic.LoadInt64(&lastHealthCheckUnixNano) - if timeNano == 0 { - return 0 - } - return time.Since(time.Unix(0, timeNano)) -} - -func SinceLastGoodHealthCheck() time.Duration { - timeNano := atomic.LoadInt64(&lastGoodHealthCheckUnixNano) - if timeNano == 0 { - return 0 - } - return time.Since(time.Unix(0, timeNano)) -} - // ContinuousRegistration will continuously update the node_health // table showing that the current process is still running. func ContinuousRegistration(extraInfo string, command string) { diff --git a/go/vt/orchestrator/process/health_dao.go b/go/vt/orchestrator/process/health_dao.go index 0a51418fde2..26649ba5f3d 100644 --- a/go/vt/orchestrator/process/health_dao.go +++ b/go/vt/orchestrator/process/health_dao.go @@ -19,15 +19,16 @@ package process import ( "time" + "vitess.io/vitess/go/vt/log" + "fmt" "vitess.io/vitess/go/vt/orchestrator/config" "vitess.io/vitess/go/vt/orchestrator/db" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" ) -// RegisterNode writes down this node in the node_health table +// WriteRegisterNode writes down this node in the node_health table func WriteRegisterNode(nodeHealth *NodeHealth) (healthy bool, err error) { timeNow := time.Now() reportedAgo := timeNow.Sub(nodeHealth.LastReported) @@ -65,11 +66,13 @@ func WriteRegisterNode(nodeHealth *NodeHealth) (healthy bool, err error) { nodeHealth.Hostname, nodeHealth.Token, ) if err != nil { - return false, log.Errore(err) + log.Error(err) + return false, err } rows, err := sqlResult.RowsAffected() if err != nil { - return false, log.Errore(err) + log.Error(err) + return false, err } if rows > 0 { return true, nil @@ -98,11 +101,13 @@ func WriteRegisterNode(nodeHealth *NodeHealth) (healthy bool, err error) { nodeHealth.AppVersion, dbBackend, ) if err != nil { - return false, log.Errore(err) + log.Error(err) + return false, err } rows, err := sqlResult.RowsAffected() if err != nil { - return false, log.Errore(err) + log.Error(err) + return false, err } if rows > 0 { return true, nil @@ -138,7 +143,8 @@ func ExpireNodesHistory() error { `, config.Config.UnseenInstanceForgetHours, ) - return log.Errore(err) + log.Error(err) + return err } func ReadAvailableNodes(onlyHTTPNodes bool) (nodes [](*NodeHealth), err error) { @@ -170,26 +176,6 @@ func ReadAvailableNodes(onlyHTTPNodes bool) (nodes [](*NodeHealth), err error) { nodes = append(nodes, nodeHealth) return nil }) - return nodes, log.Errore(err) -} - -func TokenBelongsToHealthyHTTPService(token string) (result bool, err error) { - extraInfo := string(OrchestratorExecutionHTTPMode) - - query := ` - select - token - from - node_health - where - and token = ? - and extra_info = ? - ` - - err = db.QueryOrchestrator(query, sqlutils.Args(token, extraInfo), func(m sqlutils.RowMap) error { - // Row exists? We're happy - result = true - return nil - }) - return result, log.Errore(err) + log.Error(err) + return nodes, err } diff --git a/go/vt/orchestrator/process/host.go b/go/vt/orchestrator/process/host.go index 8a2cfd7c729..21e3909cbdd 100644 --- a/go/vt/orchestrator/process/host.go +++ b/go/vt/orchestrator/process/host.go @@ -19,7 +19,7 @@ package process import ( "os" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" + "vitess.io/vitess/go/vt/log" ) var ThisHostname string diff --git a/go/vt/orchestrator/ssl/ssl.go b/go/vt/orchestrator/ssl/ssl.go index 15e8da4df88..8b368f19c74 100644 --- a/go/vt/orchestrator/ssl/ssl.go +++ b/go/vt/orchestrator/ssl/ssl.go @@ -10,11 +10,12 @@ import ( "os" "strings" + "vitess.io/vitess/go/vt/log" + "github.com/go-martini/martini" "github.com/howeyc/gopass" "vitess.io/vitess/go/vt/orchestrator/config" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" ) // Determine if a string element is in a string array @@ -75,11 +76,11 @@ func Verify(r *nethttp.Request, validOUs []string) error { } for _, chain := range r.TLS.VerifiedChains { s := chain[0].Subject.OrganizationalUnit - log.Debug("All OUs:", strings.Join(s, " ")) + log.Infof("All OUs:", strings.Join(s, " ")) for _, ou := range s { - log.Debug("Client presented OU:", ou) + log.Infof("Client presented OU:", ou) if HasString(ou, validOUs) { - log.Debug("Found valid OU:", ou) + log.Infof("Found valid OU:", ou) return nil } } @@ -91,7 +92,7 @@ func Verify(r *nethttp.Request, validOUs []string) error { // TODO: make this testable? func VerifyOUs(validOUs []string) martini.Handler { return func(res nethttp.ResponseWriter, req *nethttp.Request, c martini.Context) { - log.Debug("Verifying client OU") + log.Infof("Verifying client OU") if err := Verify(req, validOUs); err != nil { nethttp.Error(res, err.Error(), nethttp.StatusUnauthorized) } diff --git a/go/vt/orchestrator/test/db.go b/go/vt/orchestrator/test/db.go index ba22e9b0d1f..d7126a78a9e 100644 --- a/go/vt/orchestrator/test/db.go +++ b/go/vt/orchestrator/test/db.go @@ -19,6 +19,8 @@ package test import ( "fmt" + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/orchestrator/db" "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" ) @@ -36,6 +38,7 @@ func NewTestDB(rowMaps [][]sqlutils.RowMap) *DB { } func (t *DB) QueryOrchestrator(query string, argsArray []any, onRow func(sqlutils.RowMap) error) error { + log.Info("test") rowMaps, err := t.getRowMapsForQuery() if err != nil { return err diff --git a/go/vt/orchestrator/util/token_test.go b/go/vt/orchestrator/util/token_test.go index 5dfa1fc084d..8c8f97cff4f 100644 --- a/go/vt/orchestrator/util/token_test.go +++ b/go/vt/orchestrator/util/token_test.go @@ -3,6 +3,8 @@ package util import ( "testing" + "vitess.io/vitess/go/vt/log" + test "vitess.io/vitess/go/vt/orchestrator/external/golib/tests" ) @@ -17,6 +19,7 @@ func TestNewToken(t *testing.T) { } func TestNewTokenRandom(t *testing.T) { + log.Infof("test") token1 := NewToken() token2 := NewToken() diff --git a/go/vt/vtgate/planbuilder/select.go b/go/vt/vtgate/planbuilder/select.go index 24ff584615c..77ecd41be07 100644 --- a/go/vt/vtgate/planbuilder/select.go +++ b/go/vt/vtgate/planbuilder/select.go @@ -20,9 +20,9 @@ import ( "errors" "fmt" - "vitess.io/vitess/go/vt/vtgate/planbuilder/plancontext" + "vitess.io/vitess/go/vt/log" - "vitess.io/vitess/go/vt/orchestrator/external/golib/log" + "vitess.io/vitess/go/vt/vtgate/planbuilder/plancontext" "vitess.io/vitess/go/vt/key"