diff --git a/.changes/unreleased/Changed-20241119-223336.yaml b/.changes/unreleased/Changed-20241119-223336.yaml new file mode 100644 index 0000000..b05e6c5 --- /dev/null +++ b/.changes/unreleased/Changed-20241119-223336.yaml @@ -0,0 +1,3 @@ +kind: Changed +body: Active profile now uses 'current-profile' key in yaml config, rather than 'active_profile', to comply with the docs +time: 2024-11-19T22:33:36.858984953+01:00 diff --git a/.changes/unreleased/Fixed-20241119-223223.yaml b/.changes/unreleased/Fixed-20241119-223223.yaml new file mode 100644 index 0000000..511978b --- /dev/null +++ b/.changes/unreleased/Fixed-20241119-223223.yaml @@ -0,0 +1,3 @@ +kind: Fixed +body: '''ydbops maintenance'' command could not accept nodeIds in ''--hosts'' option (e.g. --hosts=1,2)' +time: 2024-11-19T22:32:23.277140842+01:00 diff --git a/.changes/unreleased/Fixed-20241119-223739.yaml b/.changes/unreleased/Fixed-20241119-223739.yaml new file mode 100644 index 0000000..319c09b --- /dev/null +++ b/.changes/unreleased/Fixed-20241119-223739.yaml @@ -0,0 +1,3 @@ +kind: Fixed +body: '''ydbops maintenance'' subtree should now properly use filters such as ''started'', ''version'' etc.' +time: 2024-11-19T22:37:39.439538408+01:00 diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 6bd5cff..ae6f0e6 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -26,7 +26,7 @@ jobs: uses: golangci/golangci-lint-action@v6 with: version: v1.61.0 - args: --verbose --disable-all --new-from-rev=origin/master --enable wrapcheck,stylecheck,funlen,mnd,cyclop + args: --verbose --disable-all --new-from-rev=origin/master --enable stylecheck,funlen tests: name: run tests diff --git a/cmd/maintenance/complete/options.go b/cmd/maintenance/complete/options.go index 5b4679f..c3a523d 100644 --- a/cmd/maintenance/complete/options.go +++ b/cmd/maintenance/complete/options.go @@ -10,20 +10,22 @@ import ( ) type Options struct { - TaskID string - HostFQDNs []string + TaskID string + Hosts []string } func (o *Options) DefineFlags(fs *pflag.FlagSet) { - fs.StringSliceVar(&o.HostFQDNs, "hosts", []string{}, - "FQDNs of hosts with completed maintenance") + fs.StringSliceVar(&o.Hosts, "hosts", []string{}, + `FQDNs or nodeIds of hosts with completed maintenance. You can specify a list of host FQDNs or a list of node ids, + but you can not mix host FQDNs and node ids in this option. The list is comma-delimited. + E.g.: '--hosts=1,2,3' or '--hosts=fqdn1,fqdn2,fqdn3'`) fs.StringVar(&o.TaskID, "task-id", "", "ID of your maintenance task (result of `ydbops maintenance host`)") } func (o *Options) Validate() error { // TODO(shmel1k@): remove copypaste between drop, create & refresh methods. - if len(o.HostFQDNs) == 0 { + if len(o.Hosts) == 0 { return fmt.Errorf("--hosts unspecified") } if o.TaskID == "" { @@ -33,7 +35,7 @@ func (o *Options) Validate() error { } func (o *Options) Run(f cmdutil.Factory) error { - result, err := f.GetCMSClient().CompleteActions(o.TaskID, o.HostFQDNs) + result, err := f.GetCMSClient().CompleteActions(o.TaskID, o.Hosts) if err != nil { return err } diff --git a/cmd/maintenance/create/create.go b/cmd/maintenance/create/create.go index 262eb31..0941b69 100644 --- a/cmd/maintenance/create/create.go +++ b/cmd/maintenance/create/create.go @@ -5,13 +5,10 @@ import ( "github.com/ydb-platform/ydbops/pkg/cli" "github.com/ydb-platform/ydbops/pkg/cmdutil" - "github.com/ydb-platform/ydbops/pkg/rolling" ) func New(f cmdutil.Factory) *cobra.Command { - opts := &Options{ - RestartOptions: &rolling.RestartOptions{}, - } + opts := &Options{} cmd := cli.SetDefaultsOn(&cobra.Command{ Use: "create", diff --git a/cmd/maintenance/create/options.go b/cmd/maintenance/create/options.go index 96e5c68..fa2acbd 100644 --- a/cmd/maintenance/create/options.go +++ b/cmd/maintenance/create/options.go @@ -8,41 +8,131 @@ import ( "github.com/spf13/pflag" "google.golang.org/protobuf/types/known/durationpb" + "github.com/ydb-platform/ydb-go-genproto/draft/protos/Ydb_Maintenance" + + "github.com/ydb-platform/ydbops/cmd/restart" "github.com/ydb-platform/ydbops/pkg/client/cms" "github.com/ydb-platform/ydbops/pkg/cmdutil" - "github.com/ydb-platform/ydbops/pkg/rolling" + "github.com/ydb-platform/ydbops/pkg/options" + "github.com/ydb-platform/ydbops/pkg/prettyprint" + "github.com/ydb-platform/ydbops/pkg/rolling/restarters" + "github.com/ydb-platform/ydbops/pkg/utils" ) type Options struct { - *rolling.RestartOptions + options.TargetingOptions + + MaintenanceDuration int } +const ( + DefaultMaintenanceDurationSeconds = 3600 +) + func (o *Options) DefineFlags(fs *pflag.FlagSet) { - o.RestartOptions.DefineFlags(fs) + o.TargetingOptions.DefineFlags(fs) + + fs.IntVar(&o.MaintenanceDuration, "duration", DefaultMaintenanceDurationSeconds, + `CMS will release the node for maintenance for duration seconds. Any maintenance +after that would be considered a regular cluster failure`) } func (o *Options) Validate() error { - return o.RestartOptions.Validate() + if o.MaintenanceDuration < 0 { + return fmt.Errorf("specified invalid maintenance duration: %d. Must be positive", o.MaintenanceDuration) + } + + return o.TargetingOptions.Validate() +} + +func (o *Options) nodeIdsToNodes( + nodes []*Ydb_Maintenance.Node, + nodeIds []uint32, +) []*Ydb_Maintenance.Node { + targetedNodes := []*Ydb_Maintenance.Node{} + + // TODO @jorres arguments to PrepareRestarters are a dirty hack. + // We actually only need Filter component from restarters. 2 and 3 arguments + // are required in PrepareRestarters to actually perform node restarts, + // but we only use restarters in the scope of this function to filter nodes + // so their value does not matter. Splitting something like 'Filterers' from + // Restarters into separate interface should solve this. + storageRestarter, tenantRestarter := restart.PrepareRestarters( + &o.TargetingOptions, + []string{}, + "", + o.MaintenanceDuration, + ) + + filterNodeParams := restarters.FilterNodeParams{ + Version: o.VersionSpec, + SelectedTenants: o.TenantList, + SelectedNodeIds: nodeIds, + SelectedHosts: []string{}, + SelectedDatacenters: o.Datacenters, + StartedTime: o.StartedTime, + ExcludeHosts: o.ExcludeHosts, + MaxStaticNodeID: uint32(o.MaxStaticNodeID), + } + + clusterNodesInfo := restarters.ClusterNodesInfo{ + AllNodes: nodes, + TenantToNodeIds: utils.PopulateTenantToNodesMapping(nodes), + } + + targetedNodes = append(targetedNodes, storageRestarter.Filter(filterNodeParams, clusterNodesInfo)...) + targetedNodes = append(targetedNodes, tenantRestarter.Filter(filterNodeParams, clusterNodesInfo)...) + + return targetedNodes } func (o *Options) Run(f cmdutil.Factory) error { taskUID := cms.TaskUuidPrefix + uuid.New().String() - duration := time.Duration(o.RestartOptions.RestartDuration) * time.Minute - taskId, err := f.GetCMSClient().CreateMaintenanceTask(cms.MaintenanceTaskParams{ - Hosts: o.RestartOptions.Hosts, - Duration: durationpb.New(duration), - AvailabilityMode: o.RestartOptions.GetAvailabilityMode(), - ScopeType: cms.HostScope, - TaskUID: taskUID, - }) + duration := time.Duration(o.MaintenanceDuration) * time.Second + + nodes, err := f.GetCMSClient().Nodes() + if err != nil { + return err + } + nodeIds, errIds := utils.GetNodeIds(o.Hosts) + hostFQDNs, errFqdns := utils.GetNodeFQDNs(o.Hosts) + if errIds != nil && errFqdns != nil { + return fmt.Errorf( + "failed to parse --hosts argument as node ids (%w) or host fqdns (%w)", + errIds, + errFqdns, + ) + } + + var task cms.MaintenanceTask + if errIds == nil { + task, err = f.GetCMSClient().CreateMaintenanceTask(cms.MaintenanceTaskParams{ + Nodes: o.nodeIdsToNodes(nodes, nodeIds), + Duration: durationpb.New(duration), + AvailabilityMode: o.GetAvailabilityMode(), + ScopeType: cms.NodeScope, + TaskUID: taskUID, + }) + } else { + task, err = f.GetCMSClient().CreateMaintenanceTask(cms.MaintenanceTaskParams{ + Hosts: hostFQDNs, + Duration: durationpb.New(duration), + AvailabilityMode: o.GetAvailabilityMode(), + ScopeType: cms.HostScope, + TaskUID: taskUID, + }) + } + if err != nil { return err } fmt.Printf( "Your task id is:\n\n%s\n\nPlease write it down for refreshing and completing the task later.\n", - taskId.GetTaskUid(), + task.GetTaskUid(), ) + fmt.Println(prettyprint.TaskToString(task)) + return nil } diff --git a/cmd/maintenance/maintenance.go b/cmd/maintenance/maintenance.go index 9034260..397e4ef 100644 --- a/cmd/maintenance/maintenance.go +++ b/cmd/maintenance/maintenance.go @@ -10,28 +10,19 @@ import ( "github.com/ydb-platform/ydbops/cmd/maintenance/refresh" "github.com/ydb-platform/ydbops/pkg/cli" "github.com/ydb-platform/ydbops/pkg/cmdutil" - "github.com/ydb-platform/ydbops/pkg/command" ) -type Options struct { - *command.BaseOptions -} - func New(f cmdutil.Factory) *cobra.Command { - options := &Options{} - c := cli.SetDefaultsOn(&cobra.Command{ + cmd := cli.SetDefaultsOn(&cobra.Command{ Use: "maintenance", Short: "Request hosts from the Cluster Management System", Long: `ydbops maintenance [command]: Manage host maintenance operations: request and return hosts with performed maintenance back to the cluster.`, - PreRunE: cli.PopulateProfileDefaultsAndValidate( - options.BaseOptions, options, - ), RunE: cli.RequireSubcommand, }) - c.AddCommand( + cmd.AddCommand( complete.New(f), create.New(f), drop.New(f), @@ -39,5 +30,5 @@ func New(f cmdutil.Factory) *cobra.Command { refresh.New(f), ) - return c + return cmd } diff --git a/cmd/restart/options.go b/cmd/restart/options.go index 5e2d3c9..168cb44 100644 --- a/cmd/restart/options.go +++ b/cmd/restart/options.go @@ -20,44 +20,57 @@ func (o *Options) DefineFlags(fs *pflag.FlagSet) { o.RestartOptions.DefineFlags(fs) } -func (o *Options) Run(f cmdutil.Factory) error { - var storageRestarter restarters.Restarter - var tenantRestarter restarters.Restarter - - if o.RestartOptions.KubeconfigPath != "" { - storageRestarter = restarters.NewStorageK8sRestarter( +func PrepareRestarters( + opts *options.TargetingOptions, + sshArgs []string, + customSystemdUnitName string, + restartDuration int, +) (storage, tenant restarters.Restarter) { + if opts.KubeconfigPath != "" { + storage = restarters.NewStorageK8sRestarter( options.Logger, &restarters.StorageK8sRestarterOptions{ K8sRestarterOptions: &restarters.K8sRestarterOptions{ - KubeconfigPath: o.RestartOptions.KubeconfigPath, - Namespace: o.RestartOptions.K8sNamespace, - RestartDuration: time.Duration(o.RestartOptions.RestartDuration) * time.Second, + KubeconfigPath: opts.KubeconfigPath, + Namespace: opts.K8sNamespace, + RestartDuration: time.Duration(restartDuration) * time.Second, }, }, ) - tenantRestarter = restarters.NewTenantK8sRestarter( + tenant = restarters.NewTenantK8sRestarter( options.Logger, &restarters.TenantK8sRestarterOptions{ K8sRestarterOptions: &restarters.K8sRestarterOptions{ - KubeconfigPath: o.RestartOptions.KubeconfigPath, - Namespace: o.RestartOptions.K8sNamespace, - RestartDuration: time.Duration(o.RestartOptions.RestartDuration) * time.Second, + KubeconfigPath: opts.KubeconfigPath, + Namespace: opts.K8sNamespace, + RestartDuration: time.Duration(restartDuration) * time.Second, }, }, ) - } else { - storageRestarter = restarters.NewStorageSSHRestarter( - options.Logger, - o.RestartOptions.SSHArgs, - o.RestartOptions.CustomSystemdUnitName, - ) - tenantRestarter = restarters.NewTenantSSHRestarter( - options.Logger, - o.RestartOptions.SSHArgs, - o.RestartOptions.CustomSystemdUnitName, - ) + return storage, tenant } + storage = restarters.NewStorageSSHRestarter( + options.Logger, + sshArgs, + customSystemdUnitName, + ) + tenant = restarters.NewTenantSSHRestarter( + options.Logger, + sshArgs, + customSystemdUnitName, + ) + return storage, tenant +} + +func (o *Options) Run(f cmdutil.Factory) error { + storageRestarter, tenantRestarter := PrepareRestarters( + &o.TargetingOptions, + o.SSHArgs, + o.CustomSystemdUnitName, + o.RestartDuration, + ) + bothUnspecified := !o.RestartOptions.Storage && !o.RestartOptions.Tenant var executer rolling.Executer diff --git a/pkg/client/cms/cms.go b/pkg/client/cms/cms.go index 44a1db1..6836b92 100644 --- a/pkg/client/cms/cms.go +++ b/pkg/client/cms/cms.go @@ -197,6 +197,7 @@ func (c *defaultCMSClient) CreateMaintenanceTask(params MaintenanceTaskParams) ( }, } + fmt.Println(params.Duration) if params.ScopeType == NodeScope { request.ActionGroups = actionGroupsFromNodes(params) } else { // HostScope diff --git a/pkg/client/cms/maintenance.go b/pkg/client/cms/maintenance.go index 6d669b5..ae7ed4c 100644 --- a/pkg/client/cms/maintenance.go +++ b/pkg/client/cms/maintenance.go @@ -8,6 +8,7 @@ import ( "github.com/ydb-platform/ydb-go-genproto/draft/protos/Ydb_Maintenance" "github.com/ydb-platform/ydbops/pkg/client" + "github.com/ydb-platform/ydbops/pkg/utils" ) const ( @@ -34,35 +35,71 @@ type Maintenance interface { } // CompleteActions implements Client. -func (d *defaultCMSClient) CompleteActions(taskID string, hostFQDNs []string) (*Ydb_Maintenance.ManageActionResult, error) { +func (d *defaultCMSClient) CompleteActions(taskID string, hosts []string) (*Ydb_Maintenance.ManageActionResult, error) { task, err := d.GetMaintenanceTask(taskID) if err != nil { return nil, fmt.Errorf("failed to get maintenance task %v: %w", taskID, err) } - hostToActionUID := make(map[string]*Ydb_Maintenance.ActionUid) + nodeIDs, errIds := utils.GetNodeIds(hosts) + hostFQDNs, errFqdns := utils.GetNodeFQDNs(hosts) + + if errIds != nil && errFqdns != nil { + return nil, fmt.Errorf( + "failed to parse --hosts argument as node ids (%w) or host fqdns (%w)", + errIds, + errFqdns, + ) + } + + hostFQDNToActionUID := make(map[string]*Ydb_Maintenance.ActionUid) + nodeIDToActionUID := make(map[uint32]*Ydb_Maintenance.ActionUid) for _, gs := range task.GetActionGroupStates() { as := gs.ActionStates[0] scope := as.Action.GetLockAction().Scope - host := scope.GetHost() - if host == "" { - return nil, fmt.Errorf("Trying to complete an action with nodeId scope, currently unimplemented") + + hostFqdn := scope.GetHost() + nodeID := scope.GetNodeId() + switch { + case hostFqdn != "": + hostFQDNToActionUID[hostFqdn] = as.ActionUid + case nodeID != 0: + nodeIDToActionUID[nodeID] = as.ActionUid + default: + return nil, fmt.Errorf( + "failed to complete action. An action's scope didn't contain host or nodeID: %+v. Contact the developers", + scope, + ) } + } - hostToActionUID[host] = as.ActionUid + var finishedActions []*Ydb_Maintenance.ActionUid + + if errIds == nil { + finishedActions, err = getFinishedActions(nodeIDs, nodeIDToActionUID) + } else { + finishedActions, err = getFinishedActions(hostFQDNs, hostFQDNToActionUID) } - completedActions := []*Ydb_Maintenance.ActionUid{} - for _, host := range hostFQDNs { - actionUid, present := hostToActionUID[host] + if err != nil { + return nil, err + } + + return d.CompleteAction(finishedActions) +} + +func getFinishedActions[T uint32 | string](nodes []T, nodeToActionUID map[T]*Ydb_Maintenance.ActionUid) ([]*Ydb_Maintenance.ActionUid, error) { + finishedActions := []*Ydb_Maintenance.ActionUid{} + for _, host := range nodes { + actionUID, present := nodeToActionUID[host] if !present { - return nil, fmt.Errorf("Failed to complete host %s, corresponding CMS action not found.\n"+ + return nil, fmt.Errorf("failed to complete host %v, corresponding CMS action not found.\n"+ "This host either was never requested or already completed", host) } - completedActions = append(completedActions, actionUid) + finishedActions = append(finishedActions, actionUID) } - return d.CompleteAction(completedActions) + return finishedActions, nil } func (d *defaultCMSClient) queryEachTaskForActions(taskIds []string) ([]MaintenanceTask, error) { diff --git a/pkg/maintenance/host.go b/pkg/maintenance/host.go deleted file mode 100644 index 255536e..0000000 --- a/pkg/maintenance/host.go +++ /dev/null @@ -1,63 +0,0 @@ -package maintenance - -import ( - "fmt" - - "github.com/google/uuid" - "github.com/ydb-platform/ydb-go-genproto/draft/protos/Ydb_Maintenance" - "google.golang.org/protobuf/types/known/durationpb" - - "github.com/ydb-platform/ydbops/pkg/client/cms" -) - -const ( - MaintenanceTaskPrefix = "maintenance-" -) - -func getNodesOnHost(cmsClient cms.Client, hostFQDN string) ([]*Ydb_Maintenance.Node, error) { - nodes, err := cmsClient.Nodes() - if err != nil { - return nil, err - } - - res := []*Ydb_Maintenance.Node{} - - for _, node := range nodes { - // TODO here is the non-trivial part with Kubernetes, surgically create a shared logic - // with Kubernetes restarters - if node.Host == hostFQDN { - res = append(res, node) - } - } - - return res, nil -} - -type RequestHostParams struct { - AvailabilityMode Ydb_Maintenance.AvailabilityMode - HostFQDN string - MaintenanceDuration *durationpb.Duration -} - -func RequestHost(cmsClient cms.Client, params *RequestHostParams) (string, error) { - taskUID := MaintenanceTaskPrefix + uuid.New().String() - - nodes, err := getNodesOnHost(cmsClient, params.HostFQDN) - if err != nil { - return "", err - } - - taskParams := cms.MaintenanceTaskParams{ - TaskUID: taskUID, - AvailabilityMode: params.AvailabilityMode, - Duration: params.MaintenanceDuration, - Nodes: nodes, - } - - task, err := cmsClient.CreateMaintenanceTask(taskParams) - if err != nil { - return "", fmt.Errorf("failed to create maintenance task: %w", err) - } - - return task.GetTaskUid(), nil -} diff --git a/pkg/options/targeting.go b/pkg/options/targeting.go new file mode 100644 index 0000000..4da49a4 --- /dev/null +++ b/pkg/options/targeting.go @@ -0,0 +1,198 @@ +package options + +import ( + "fmt" + "regexp" + "strconv" + "strings" + "time" + "unicode/utf8" + + "github.com/spf13/pflag" + "github.com/ydb-platform/ydb-go-genproto/draft/protos/Ydb_Maintenance" + + "github.com/ydb-platform/ydbops/internal/collections" + "github.com/ydb-platform/ydbops/pkg/profile" + "github.com/ydb-platform/ydbops/pkg/utils" +) + +const ( + DefaultMaxStaticNodeID = 50000 +) + +var ( + startedUnparsedFlag string + versionUnparsedFlag string +) + +var ( + majorMinorPatchPattern = `^(>|<|!=|~=)(\d+|\*)\.(\d+|\*)\.(\d+|\*)$` + majorMinorPatchRegexp = regexp.MustCompile(majorMinorPatchPattern) + + rawPattern = `^(==|!=)(.*)$` + rawRegexp = regexp.MustCompile(rawPattern) +) + +type TargetingOptions struct { + AvailabilityMode string + Datacenters []string + Hosts []string + ExcludeHosts []string + Version string + + StartedTime *StartedTime + VersionSpec VersionSpec + + Storage bool + Tenant bool + TenantList []string + + KubeconfigPath string + K8sNamespace string + + MaxStaticNodeID int +} + +func (o *TargetingOptions) DefineFlags(fs *pflag.FlagSet) { + fs.BoolVar(&o.Storage, "storage", false, `Only include storage nodes. Otherwise, include all nodes by default`) + + fs.BoolVar(&o.Tenant, "tenant", false, `Only include tenant nodes. Otherwise, include all nodes by default`) + + fs.StringSliceVar(&o.TenantList, "tenant-list", []string{}, `Comma-delimited list of tenant names to restart. + E.g.:'--tenant-list=name1,name2,name3'`) + + fs.StringSliceVar(&o.Datacenters, "dc", []string{}, + `Filter hosts by specific datacenter. The list is comma-delimited. + E.g.: '--dc=ru-central1-a,ru-central1-b`) + + fs.StringSliceVar(&o.Hosts, "hosts", []string{}, + `Restart only specified hosts. You can specify a list of host FQDNs or a list of node ids, +but you can not mix host FQDNs and node ids in this option. The list is comma-delimited. + E.g.: '--hosts=1,2,3' or '--hosts=fqdn1,fqdn2,fqdn3'`) + + fs.StringSliceVar(&o.ExcludeHosts, "exclude-hosts", []string{}, + `Comma-delimited list. Do not restart these hosts, even if they are explicitly specified in --hosts.`) + + fs.StringVar(&o.AvailabilityMode, "availability-mode", "strong", + fmt.Sprintf("Availability mode. Available choices: %s", strings.Join(AvailabilityModes, ", "))) + + fs.StringVar(&startedUnparsedFlag, "started", "", + fmt.Sprintf(`Apply filter by node started time. +Format: "<>%%Y-%%m-%%dT%%H:%%M:%%SZ", quotes are necessary, otherwise shell treats '<' or '>' as stream redirection. +For example, --started ">2024-03-13T17:20:06Z" means all nodes started LATER than 2024 March 13, 17:20:06 UTC. +If you reverse the sign (--started ">2024-03-13T17:20:06Z"), you will select nodes with LARGER uptimes.`)) + + fs.StringVar(&versionUnparsedFlag, "version", "", + `Apply filter by node version. +Format: [(<|>|!=|~=)MAJOR.MINOR.PATCH|(==|!=)VERSION_STRING], e.g.: +'--version ~=24.1.2' or +'--version !=24.1.2-ydb-stable-hotfix-5'`) + + fs.IntVar(&o.MaxStaticNodeID, "max-static-node-id", DefaultMaxStaticNodeID, + `This argument is used to help ydbops distinguish storage and dynamic nodes. +Nodes with this nodeId or less will be considered storage.`) + + profile.PopulateFromProfileLater( + fs.StringVar, &o.KubeconfigPath, "kubeconfig", + "", + "[can specify in profile] Path to kubeconfig file.") + + profile.PopulateFromProfileLater( + fs.StringVar, &o.K8sNamespace, "k8s-namespace", + "", + "[can specify in profile] Limit your operations to pods in this kubernetes namespace.") +} + +func (o *TargetingOptions) Validate() error { + if !collections.Contains(AvailabilityModes, o.AvailabilityMode) { + return fmt.Errorf("specified a non-existing availability mode: %s", o.AvailabilityMode) + } + + if len(o.KubeconfigPath) > 0 && len(o.K8sNamespace) == 0 { + return fmt.Errorf("specified --kubeconfig, but not --k8s-namespace") + } + + if o.MaxStaticNodeID < 0 { + return fmt.Errorf("specified invalid max-static-node-id: %d. Must be positive", o.MaxStaticNodeID) + } + + if len(o.TenantList) > 0 && !o.Tenant { + return fmt.Errorf("--tenant-list specified, but --tenant is not explicitly specified." + + "Please specify --tenant as well to clearly indicate your intentions") + } + + if startedUnparsedFlag != "" { + directionRune, _ := utf8.DecodeRuneInString(startedUnparsedFlag) + if directionRune != '<' && directionRune != '>' { + return fmt.Errorf("the first character of --started value should be < or >") + } + + timestampString, _ := strings.CutPrefix(startedUnparsedFlag, string(directionRune)) + timestamp, err := time.Parse(time.RFC3339, timestampString) + if err != nil { + return fmt.Errorf("failed to parse --started: %w", err) + } + + o.StartedTime = &StartedTime{ + Timestamp: timestamp, + Direction: directionRune, + } + } + + if versionUnparsedFlag != "" { + var err error + o.VersionSpec, err = parseVersionFlag(versionUnparsedFlag) + if err != nil { + return err + } + } + + _, errFromIds := utils.GetNodeIds(o.Hosts) + _, errFromFQDNs := utils.GetNodeFQDNs(o.Hosts) + if errFromIds != nil && errFromFQDNs != nil { + return fmt.Errorf( + "failed to parse --hosts argument as node ids (%w) or host fqdns (%w)", + errFromIds, + errFromFQDNs, + ) + } + + return nil +} + +func parseVersionFlag(versionUnparsedFlag string) (VersionSpec, error) { + matches := majorMinorPatchRegexp.FindStringSubmatch(versionUnparsedFlag) + if len(matches) == 5 { + // `--version` value looks like (sign)major.minor.patch + major, _ := strconv.Atoi(matches[2]) + minor, _ := strconv.Atoi(matches[3]) + patch, _ := strconv.Atoi(matches[4]) + return &MajorMinorPatchVersion{ + Sign: matches[1], + Major: major, + Minor: minor, + Patch: patch, + }, nil + } + + matches = rawRegexp.FindStringSubmatch(versionUnparsedFlag) + if len(matches) == 3 { + // `--version` value is an arbitrary string value, and will + // be compared directly + return &RawVersion{ + Sign: matches[1], + Raw: matches[2], + }, nil + } + + return nil, fmt.Errorf( + "failed to interpret the value of `--version` flag. Read `ydbops restart --help` for more info on what is expected", + ) +} + +func (o *TargetingOptions) GetAvailabilityMode() Ydb_Maintenance.AvailabilityMode { + title := strings.ToUpper(fmt.Sprintf("availability_mode_%s", o.AvailabilityMode)) + value := Ydb_Maintenance.AvailabilityMode_value[title] + + return Ydb_Maintenance.AvailabilityMode(value) +} diff --git a/pkg/profile/profile.go b/pkg/profile/profile.go index e4ef12f..7d86807 100644 --- a/pkg/profile/profile.go +++ b/pkg/profile/profile.go @@ -12,11 +12,17 @@ type option struct { defaultValue string } -// TODO make a generic solution, at this moment only string values are supported -var pointersToProgramOptions = make(map[string]option) +// TODO @jorres make a generic solution, at this moment only string values are supported + +// NOTE: the whole profile solution is probably a hack, and there must be a better way of doing it. +// Currently, this map, for every key that is supported by a profile (e.g. 'kubeconfig'), holds +// pointers to all string variables for this key across all commands (e.g. for 'kubeconfig' profile +// option, there are multiple commands which have RestartOptions and thus need filling +// KubeconfigPath in profile: run, maintenance, restart). +var pointersToProgramOptions = make(map[string][]option) const ( - activeProfileKeyName = "active_profile" + activeProfileKeyName = "current-profile" profileArrayKeyName = "profiles" ) @@ -60,19 +66,32 @@ func FillDefaultsFromActiveProfile(configFile, profileName string) error { return fmt.Errorf("profile `%s` not found in your profile file", profileName) } - for optionName, defValue := range profile.(map[any]any) { - option, ok := pointersToProgramOptions[optionName.(string)] + for optionName, valueFromFile := range profile.(map[any]any) { + options, ok := pointersToProgramOptions[optionName.(string)] if !ok { return fmt.Errorf("profile `%s` contains unsupported field `%s`", profileName, optionName) } - if *option.ptr == option.defaultValue { - *option.ptr = defValue.(string) + for _, option := range options { + if *option.ptr == option.defaultValue { + *option.ptr = valueFromFile.(string) + } } } return nil } +func appendToOptions(flagName string, ptr *string, defaultValue string) { + if pointersToProgramOptions[flagName] == nil { + pointersToProgramOptions[flagName] = []option{} + } + + pointersToProgramOptions[flagName] = append(pointersToProgramOptions[flagName], option{ + ptr: ptr, + defaultValue: defaultValue, + }) +} + func PopulateFromProfileLaterP( setter func(*string, string, string, string, string), ptr *string, @@ -81,10 +100,7 @@ func PopulateFromProfileLaterP( defaultValue string, usage string, ) { - pointersToProgramOptions[flagName] = option{ - ptr: ptr, - defaultValue: defaultValue, - } + appendToOptions(flagName, ptr, defaultValue) setter(ptr, flagName, shorthand, defaultValue, usage) } @@ -95,9 +111,6 @@ func PopulateFromProfileLater( defaultValue string, usage string, ) { - pointersToProgramOptions[flagName] = option{ - ptr: ptr, - defaultValue: defaultValue, - } + appendToOptions(flagName, ptr, defaultValue) setter(ptr, flagName, defaultValue, usage) } diff --git a/pkg/rolling/options.go b/pkg/rolling/options.go index fc4afe0..2804a43 100644 --- a/pkg/rolling/options.go +++ b/pkg/rolling/options.go @@ -2,151 +2,64 @@ package rolling import ( "fmt" - "regexp" - "strconv" - "strings" "time" - "unicode/utf8" "github.com/spf13/pflag" - "github.com/ydb-platform/ydb-go-genproto/draft/protos/Ydb_Maintenance" "google.golang.org/protobuf/types/known/durationpb" - "github.com/ydb-platform/ydbops/internal/collections" "github.com/ydb-platform/ydbops/pkg/options" - "github.com/ydb-platform/ydbops/pkg/profile" - "github.com/ydb-platform/ydbops/pkg/rolling/restarters" "github.com/ydb-platform/ydbops/pkg/utils" ) const ( DefaultRetryCount = 3 - DefaultRestartDurationSeconds = 60 DefaultCMSQueryIntervalSeconds = 10 -) - -var ( - majorMinorPatchPattern = `^(>|<|!=|~=)(\d+|\*)\.(\d+|\*)\.(\d+|\*)$` - majorMinorPatchRegexp = regexp.MustCompile(majorMinorPatchPattern) - - rawPattern = `^(==|!=)(.*)$` - rawRegexp = regexp.MustCompile(rawPattern) + DefaultRestartDurationSeconds = 60 ) type RestartOptions struct { - AvailabilityMode string - Datacenters []string - Hosts []string - ExcludeHosts []string - RestartDuration int + options.TargetingOptions + RestartRetryNumber int - Version string CMSQueryInterval int SuppressCompatibilityCheck bool - StartedTime *options.StartedTime - VersionSpec options.VersionSpec + RestartDuration int Continue bool - Storage bool - Tenant bool - TenantList []string - SSHArgs []string CustomSystemdUnitName string - - KubeconfigPath string - K8sNamespace string - - MaxStaticNodeId int } -var ( - startedUnparsedFlag string - versionUnparsedFlag string - rawSSHUnparsedArgs string -) +var rawSSHUnparsedArgs string func (o *RestartOptions) Validate() error { - if !collections.Contains(options.AvailabilityModes, o.AvailabilityMode) { - return fmt.Errorf("specified a non-existing availability mode: %s", o.AvailabilityMode) - } - - if len(o.KubeconfigPath) > 0 && len(o.K8sNamespace) == 0 { - return fmt.Errorf("specified --kubeconfig, but not --k8s-namespace") - } - - if o.MaxStaticNodeId < 0 { - return fmt.Errorf("specified invalid max-static-node-id: %d. Must be positive", o.MaxStaticNodeId) - } - - if o.RestartDuration < 0 { - return fmt.Errorf("specified invalid restart duration seconds: %d. Must be positive", o.RestartDuration) + err := o.TargetingOptions.Validate() + if err != nil { + return err } if o.CMSQueryInterval < 0 { - return fmt.Errorf("specified invalid cms query interval seconds: %d. Must be positive", o.RestartDuration) + return fmt.Errorf("specified invalid cms query interval seconds: %d. Must be positive", o.CMSQueryInterval) } if o.RestartRetryNumber < 0 { return fmt.Errorf("specified invalid restart retry number: %d. Must be positive", o.RestartRetryNumber) } - if len(o.TenantList) > 0 && !o.Tenant { - return fmt.Errorf("--tenant-list specified, but --tenant is not explicitly specified." + - "Please specify --tenant as well to clearly indicate your intentions.") - } - - if startedUnparsedFlag != "" { - directionRune, _ := utf8.DecodeRuneInString(startedUnparsedFlag) - if directionRune != '<' && directionRune != '>' { - return fmt.Errorf("the first character of --started value should be < or >") - } - - timestampString, _ := strings.CutPrefix(startedUnparsedFlag, string(directionRune)) - timestamp, err := time.Parse(time.RFC3339, timestampString) - if err != nil { - return fmt.Errorf("failed to parse --started: %w", err) - } - - o.StartedTime = &options.StartedTime{ - Timestamp: timestamp, - Direction: directionRune, - } - } - - if versionUnparsedFlag != "" { - var err error - o.VersionSpec, err = parseVersionFlag(versionUnparsedFlag) - if err != nil { - return err - } + if o.RestartDuration < 0 { + return fmt.Errorf("specified invalid restart duration: %d. Must be positive", o.RestartDuration) } o.SSHArgs = utils.ParseSSHArgs(rawSSHUnparsedArgs) - _, errFromIds := utils.GetNodeIds(o.Hosts) - _, errFromFQDNs := utils.GetNodeFQDNs(o.Hosts) - if errFromIds != nil && errFromFQDNs != nil { - return fmt.Errorf( - "failed to parse --hosts argument as node ids (%w) or host fqdns (%w)", - errFromIds, - errFromFQDNs, - ) - } - return nil } func (o *RestartOptions) DefineFlags(fs *pflag.FlagSet) { - fs.BoolVar(&o.Storage, "storage", false, `Only include storage nodes. Otherwise, include all nodes by default`) - - fs.BoolVar(&o.Tenant, "tenant", false, `Only include tenant nodes. Otherwise, include all nodes by default`) - - fs.StringSliceVar(&o.TenantList, "tenant-list", []string{}, `Comma-delimited list of tenant names to restart. - E.g.:'--tenant-list=name1,name2,name3'`) + o.TargetingOptions.DefineFlags(fs) fs.StringVar(&o.CustomSystemdUnitName, "systemd-unit", "", "Specify custom systemd unit name to restart") @@ -158,104 +71,26 @@ Examples: 1) --ssh-args "pssh -A -J --yc-profile " 2) --ssh-args "ssh -o ProxyCommand=\"...\""`) - fs.StringSliceVar(&o.Datacenters, "dc", []string{}, - `Filter hosts by specific datacenter. The list is comma-delimited. - E.g.: '--dc=ru-central1-a,ru-central1-b`) - - fs.StringSliceVar(&o.Hosts, "hosts", []string{}, - `Restart only specified hosts. You can specify a list of host FQDNs or a list of node ids, -but you can not mix host FQDNs and node ids in this option. The list is comma-delimited. - E.g.: '--hosts=1,2,3' or '--hosts=fqdn1,fqdn2,fqdn3'`) - - fs.StringSliceVar(&o.ExcludeHosts, "exclude-hosts", []string{}, - `Comma-delimited list. Do not restart these hosts, even if they are explicitly specified in --hosts.`) - - fs.StringVar(&o.AvailabilityMode, "availability-mode", "strong", - fmt.Sprintf("Availability mode. Available choices: %s", strings.Join(options.AvailabilityModes, ", "))) - - fs.IntVar(&o.RestartDuration, "duration", DefaultRestartDurationSeconds, - `CMS will release the node for maintenance for duration * restart-retry-number seconds. Any maintenance -after that would be considered a regular cluster failure`) - fs.IntVar(&o.RestartRetryNumber, "restart-retry-number", DefaultRetryCount, fmt.Sprintf("How many times a node should be retried on error, default %v", DefaultRetryCount)) fs.IntVar(&o.CMSQueryInterval, "cms-query-interval", DefaultCMSQueryIntervalSeconds, fmt.Sprintf("How often to query CMS while waiting for new permissions %v", DefaultCMSQueryIntervalSeconds)) - fs.StringVar(&startedUnparsedFlag, "started", "", - fmt.Sprintf(`Apply filter by node started time. -Format: "<>%%Y-%%m-%%dT%%H:%%M:%%SZ", quotes are necessary, otherwise shell treats '<' or '>' as stream redirection. -For example, --started ">2024-03-13T17:20:06Z" means all nodes started LATER than 2024 March 13, 17:20:06 UTC. -If you reverse the sign (--started ">2024-03-13T17:20:06Z"), you will select nodes with LARGER uptimes.`)) - - fs.StringVar(&versionUnparsedFlag, "version", "", - `Apply filter by node version. -Format: [(<|>|!=|~=)MAJOR.MINOR.PATCH|(==|!=)VERSION_STRING], e.g.: -'--version ~=24.1.2' or -'--version !=24.1.2-ydb-stable-hotfix-5'`) - fs.BoolVar(&o.Continue, "continue", false, `Attempt to continue previous rolling restart, if there was one. The set of selected nodes for this invocation must be the same as for the previous invocation, and this can not be verified at runtime since the ydbops utility is stateless. Use at your own risk.`) - fs.IntVar(&o.MaxStaticNodeId, "max-static-node-id", restarters.DefaultMaxStaticNodeId, - `This argument is used to help ydbops distinguish storage and dynamic nodes. -Nodes with this nodeId or less will be considered storage.`) + fs.IntVar(&o.RestartDuration, "duration", DefaultRestartDurationSeconds, + `CMS will release the node for maintenance for duration * restart-retry-number seconds. Any maintenance +after that would be considered a regular cluster failure`) fs.BoolVar(&o.SuppressCompatibilityCheck, "suppress-compat-check", false, `By default, nodes within one cluster can differ by at most one major release. ydbops will try to figure out if you broke this rule by comparing before\after of some restarted node.`) - - profile.PopulateFromProfileLater( - fs.StringVar, &o.KubeconfigPath, "kubeconfig", - "", - "[can specify in profile] Path to kubeconfig file.") - - profile.PopulateFromProfileLater( - fs.StringVar, &o.K8sNamespace, "k8s-namespace", - "", - "[can specify in profile] Limit your operations to pods in this kubernetes namespace.") -} - -func (o *RestartOptions) GetAvailabilityMode() Ydb_Maintenance.AvailabilityMode { - title := strings.ToUpper(fmt.Sprintf("availability_mode_%s", o.AvailabilityMode)) - value := Ydb_Maintenance.AvailabilityMode_value[title] - - return Ydb_Maintenance.AvailabilityMode(value) } func (o *RestartOptions) GetRestartDuration() *durationpb.Duration { return durationpb.New(time.Second * time.Duration(o.RestartDuration) * time.Duration(o.RestartRetryNumber)) } - -func parseVersionFlag(versionUnparsedFlag string) (options.VersionSpec, error) { - matches := majorMinorPatchRegexp.FindStringSubmatch(versionUnparsedFlag) - if len(matches) == 5 { - // `--version` value looks like (sign)major.minor.patch - major, _ := strconv.Atoi(matches[2]) - minor, _ := strconv.Atoi(matches[3]) - patch, _ := strconv.Atoi(matches[4]) - return &options.MajorMinorPatchVersion{ - Sign: matches[1], - Major: major, - Minor: minor, - Patch: patch, - }, nil - } - - matches = rawRegexp.FindStringSubmatch(versionUnparsedFlag) - if len(matches) == 3 { - // `--version` value is an arbitrary string value, and will - // be compared directly - return &options.RawVersion{ - Sign: matches[1], - Raw: matches[2], - }, nil - } - - return nil, fmt.Errorf( - "failed to interpret the value of `--version` flag. Read `ydbops restart --help` for more info on what is expected", - ) -} diff --git a/pkg/rolling/restarters/interface.go b/pkg/rolling/restarters/interface.go index 480ea95..ebfa1bd 100644 --- a/pkg/rolling/restarters/interface.go +++ b/pkg/rolling/restarters/interface.go @@ -27,5 +27,5 @@ type FilterNodeParams struct { SelectedNodeIds []uint32 SelectedHosts []string SelectedDatacenters []string - MaxStaticNodeId uint32 + MaxStaticNodeID uint32 } diff --git a/pkg/rolling/restarters/primitives.go b/pkg/rolling/restarters/primitives.go index f4bffff..952aae8 100644 --- a/pkg/rolling/restarters/primitives.go +++ b/pkg/rolling/restarters/primitives.go @@ -14,7 +14,7 @@ import ( ) const ( - DefaultMaxStaticNodeId = 50000 + DefaultMaxStaticNodeID = 50000 ) func FilterStorageNodes(nodes []*Ydb_Maintenance.Node, maxStaticNodeId uint32) []*Ydb_Maintenance.Node { diff --git a/pkg/rolling/restarters/run.go b/pkg/rolling/restarters/run.go index 8a53a23..fe71ca3 100644 --- a/pkg/rolling/restarters/run.go +++ b/pkg/rolling/restarters/run.go @@ -67,7 +67,7 @@ func (r *RunRestarter) Filter(spec FilterNodeParams, cluster ClusterNodesInfo) [ var runScopeNodes []*Ydb_Maintenance.Node if r.storageOnly { - storageNodes := FilterStorageNodes(cluster.AllNodes, spec.MaxStaticNodeId) + storageNodes := FilterStorageNodes(cluster.AllNodes, spec.MaxStaticNodeID) runScopeNodes = PopulateByCommonFields(storageNodes, spec) } else if r.dynnodeOnly { tenantNodes := FilterTenantNodes(cluster.AllNodes) diff --git a/pkg/rolling/restarters/storage_k8s.go b/pkg/rolling/restarters/storage_k8s.go index 36742f3..4649510 100644 --- a/pkg/rolling/restarters/storage_k8s.go +++ b/pkg/rolling/restarters/storage_k8s.go @@ -62,7 +62,7 @@ func applyStorageK8sFilteringRules( cluster ClusterNodesInfo, fqdnToPodName map[string]string, ) []*Ydb_Maintenance.Node { - allStorageNodes := FilterStorageNodes(cluster.AllNodes, spec.MaxStaticNodeId) + allStorageNodes := FilterStorageNodes(cluster.AllNodes, spec.MaxStaticNodeID) selectedByCMSNodes := PopulateByCommonFields(allStorageNodes, spec) selectedByK8sNodes := populateWithK8sRules(allStorageNodes, spec, fqdnToPodName) diff --git a/pkg/rolling/restarters/storage_k8s_test.go b/pkg/rolling/restarters/storage_k8s_test.go index 032d4dc..44f9de7 100644 --- a/pkg/rolling/restarters/storage_k8s_test.go +++ b/pkg/rolling/restarters/storage_k8s_test.go @@ -19,7 +19,7 @@ var _ = Describe("Test storage k8s Filter", func() { It("k8s restarter filtering by --started>timestamp", func() { filterSpec := FilterNodeParams{ - MaxStaticNodeId: DefaultMaxStaticNodeId, + MaxStaticNodeID: DefaultMaxStaticNodeID, StartedTime: &options.StartedTime{ Direction: '<', Timestamp: fiveMinutesAgoTimestamp, @@ -66,7 +66,7 @@ var _ = Describe("Test storage k8s Filter", func() { firstDCName := "ru-central1-a" secondDCName := "ru-central1-b" filterSpec := FilterNodeParams{ - MaxStaticNodeId: DefaultMaxStaticNodeId, + MaxStaticNodeID: DefaultMaxStaticNodeID, SelectedDatacenters: []string{secondDCName}, } diff --git a/pkg/rolling/restarters/storage_ssh.go b/pkg/rolling/restarters/storage_ssh.go index 14c9970..c3fea30 100644 --- a/pkg/rolling/restarters/storage_ssh.go +++ b/pkg/rolling/restarters/storage_ssh.go @@ -38,7 +38,7 @@ func (r StorageSSHRestarter) Filter( spec FilterNodeParams, cluster ClusterNodesInfo, ) []*Ydb_Maintenance.Node { - storageNodes := FilterStorageNodes(cluster.AllNodes, spec.MaxStaticNodeId) + storageNodes := FilterStorageNodes(cluster.AllNodes, spec.MaxStaticNodeID) preSelectedNodes := PopulateByCommonFields(storageNodes, spec) diff --git a/pkg/rolling/restarters/storage_ssh_test.go b/pkg/rolling/restarters/storage_ssh_test.go index 821f4b8..0484e9a 100644 --- a/pkg/rolling/restarters/storage_ssh_test.go +++ b/pkg/rolling/restarters/storage_ssh_test.go @@ -39,7 +39,7 @@ var _ = Describe("Test storage ssh Filter", func() { nodes := mock.CreateNodesFromShortConfig(nodeGroups, nodeInfoMap) filterSpec := FilterNodeParams{ - MaxStaticNodeId: DefaultMaxStaticNodeId, + MaxStaticNodeID: DefaultMaxStaticNodeID, StartedTime: &options.StartedTime{ Direction: '<', Timestamp: fiveMinutesAgoTimestamp, @@ -90,7 +90,7 @@ var _ = Describe("Test storage ssh Filter", func() { nodes := mock.CreateNodesFromShortConfig(nodeGroups, nodeInfoMap) filterSpec := FilterNodeParams{ - MaxStaticNodeId: DefaultMaxStaticNodeId, + MaxStaticNodeID: DefaultMaxStaticNodeID, } clusterInfo := ClusterNodesInfo{ diff --git a/pkg/rolling/restarters/tenant_k8s_test.go b/pkg/rolling/restarters/tenant_k8s_test.go index f367b57..fd8dfc6 100644 --- a/pkg/rolling/restarters/tenant_k8s_test.go +++ b/pkg/rolling/restarters/tenant_k8s_test.go @@ -19,7 +19,7 @@ var _ = Describe("Test tenant k8s Filter", func() { It("k8s restarter filtering by --started>timestamp", func() { filterSpec := FilterNodeParams{ - MaxStaticNodeId: DefaultMaxStaticNodeId, + MaxStaticNodeID: DefaultMaxStaticNodeID, StartedTime: &options.StartedTime{ Direction: '<', Timestamp: fiveMinutesAgoTimestamp, diff --git a/pkg/rolling/rolling.go b/pkg/rolling/rolling.go index a6a5463..bc949de 100644 --- a/pkg/rolling/rolling.go +++ b/pkg/rolling/rolling.go @@ -138,7 +138,7 @@ func (r *Rolling) DoRestart() error { StartedTime: r.opts.StartedTime, Version: r.opts.VersionSpec, ExcludeHosts: r.opts.ExcludeHosts, - MaxStaticNodeId: uint32(r.opts.MaxStaticNodeId), + MaxStaticNodeID: uint32(r.opts.MaxStaticNodeID), }, restarters.ClusterNodesInfo{ TenantToNodeIds: r.state.tenantNameToNodeIds, @@ -311,7 +311,7 @@ func (r *Rolling) processActionGroupStates(actions []*Ydb_Maintenance.ActionGrou defer wg.Done() // TODO: drain node, but public draining api is not available yet - r.logger.Warn("DRAINING NOT IMPLEMENTED YET") + r.logger.Info("DRAINING NOT IMPLEMENTED YET") r.logger.Debugf("Restart node with id: %d", node.NodeId) if err := r.restarter.RestartNode(node); err != nil { @@ -361,21 +361,6 @@ func (r *Rolling) atomicRememberComplete(m *sync.Mutex, actionUID *Ydb_Maintenan (*restartedNodes)++ } -func (r *Rolling) populateTenantToNodesMapping(nodes []*Ydb_Maintenance.Node) map[string][]uint32 { - tenantNameToNodeIds := make(map[string][]uint32) - for _, node := range nodes { - dynamicNode := node.GetDynamic() - if dynamicNode != nil { - tenantNameToNodeIds[dynamicNode.GetTenant()] = append( - tenantNameToNodeIds[dynamicNode.GetTenant()], - node.NodeId, - ) - } - } - - return tenantNameToNodeIds -} - func (r *Rolling) prepareState() (*state, error) { nodes, err := r.cms.Nodes() @@ -408,7 +393,7 @@ func (r *Rolling) prepareState() (*state, error) { return &state{ knownVersions: make(MajorToMinors), - tenantNameToNodeIds: r.populateTenantToNodesMapping(activeNodes), + tenantNameToNodeIds: utils.PopulateTenantToNodesMapping(activeNodes), tenants: tenants, userSID: userSID, nodes: collections.ToMap(activeNodes, func(n *Ydb_Maintenance.Node) uint32 { return n.NodeId }), diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index 398f37f..1258667 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -8,6 +8,7 @@ import ( "strings" "unicode" + "github.com/ydb-platform/ydb-go-genproto/draft/protos/Ydb_Maintenance" "github.com/ydb-platform/ydb-go-genproto/protos/Ydb" "github.com/ydb-platform/ydb-go-genproto/protos/Ydb_Issue" "github.com/ydb-platform/ydb-go-genproto/protos/Ydb_Operations" @@ -145,3 +146,18 @@ func ParseMajorMinorPatchFromVersion(version string) (major, minor, patch int, e return 0, 0, 0, fmt.Errorf("failed to parse the version number in any of the known patterns") } + +func PopulateTenantToNodesMapping(nodes []*Ydb_Maintenance.Node) map[string][]uint32 { + tenantNameToNodeIds := make(map[string][]uint32) + for _, node := range nodes { + dynamicNode := node.GetDynamic() + if dynamicNode != nil { + tenantNameToNodeIds[dynamicNode.GetTenant()] = append( + tenantNameToNodeIds[dynamicNode.GetTenant()], + node.NodeId, + ) + } + } + + return tenantNameToNodeIds +} diff --git a/tests/maintenance_test.go b/tests/maintenance_test.go index dabf195..ae7b41a 100644 --- a/tests/maintenance_test.go +++ b/tests/maintenance_test.go @@ -33,8 +33,8 @@ var _ = Describe("Test Maintenance", func() { "--ca-file", filepath.Join(".", "test-data", "ssl-data", "ca.crt"), "maintenance", "create", + "--duration", "180", "--availability-mode", "strong", - "--cms-query-interval", "1", "--hosts=ydb-1.ydb.tech,ydb-2.ydb.tech", }, expectedRequests: []proto.Message{ @@ -42,6 +42,7 @@ var _ = Describe("Test Maintenance", func() { User: mock.TestUser, Password: mock.TestPassword, }, + &Ydb_Maintenance.ListClusterNodesRequest{}, &Ydb_Maintenance.CreateMaintenanceTaskRequest{ TaskOptions: &Ydb_Maintenance.MaintenanceTaskOptions{ TaskUid: "task-uuid-1", @@ -64,8 +65,6 @@ var _ = Describe("Test Maintenance", func() { "--ca-file", filepath.Join(".", "test-data", "ssl-data", "ca.crt"), "maintenance", "list", - // "--availability-mode", "strong", - // "--cms-query-interval", "1", }, expectedRequests: []proto.Message{ &Ydb_Auth.LoginRequest{ @@ -99,8 +98,6 @@ var _ = Describe("Test Maintenance", func() { "--task-id", testWillInsertTaskUuid, "--hosts=ydb-1.ydb.tech", - // "--availability-mode", "strong", - // "--cms-query-interval", "1", }, expectedRequests: []proto.Message{ &Ydb_Auth.LoginRequest{ @@ -134,8 +131,6 @@ var _ = Describe("Test Maintenance", func() { "refresh", "--task-id", testWillInsertTaskUuid, - // "--availability-mode", "strong", - // "--cms-query-interval", "1", }, expectedRequests: []proto.Message{ &Ydb_Auth.LoginRequest{ @@ -163,8 +158,6 @@ var _ = Describe("Test Maintenance", func() { "--task-id", testWillInsertTaskUuid, "--hosts=ydb-2.ydb.tech", - // "--availability-mode", "strong", - // "--cms-query-interval", "1", }, expectedRequests: []proto.Message{ &Ydb_Auth.LoginRequest{ @@ -196,8 +189,194 @@ var _ = Describe("Test Maintenance", func() { "--ca-file", filepath.Join(".", "test-data", "ssl-data", "ca.crt"), "maintenance", "list", - // "--availability-mode", "strong", - // "--cms-query-interval", "1", + }, + expectedRequests: []proto.Message{ + &Ydb_Auth.LoginRequest{ + User: mock.TestUser, + Password: mock.TestPassword, + }, + &Ydb_Discovery.WhoAmIRequest{}, + &Ydb_Maintenance.ListMaintenanceTasksRequest{ + User: &mock.TestUser, + }, + }, + expectedOutputRegexps: []string{ + "There are no maintenance tasks", + }, + }, + }, + }, + ), + Entry("restart two storage hosts by specifying node ids, storage-only baremetal cluster", TestCase{ + nodeConfiguration: [][]uint32{ + {1, 2, 3, 4, 5, 6, 7, 8}, + }, + nodeInfoMap: map[uint32]mock.TestNodeInfo{}, + steps: []StepData{ + { + ydbopsInvocation: Command{ + "--endpoint", "grpcs://localhost:2135", + "--verbose", + "--user", mock.TestUser, + "--ca-file", filepath.Join(".", "test-data", "ssl-data", "ca.crt"), + "maintenance", + "create", + "--duration", "180", + "--availability-mode", "strong", + "--hosts=1,2", + }, + expectedRequests: []proto.Message{ + &Ydb_Auth.LoginRequest{ + User: mock.TestUser, + Password: mock.TestPassword, + }, + &Ydb_Maintenance.ListClusterNodesRequest{}, + &Ydb_Maintenance.CreateMaintenanceTaskRequest{ + TaskOptions: &Ydb_Maintenance.MaintenanceTaskOptions{ + TaskUid: "task-uuid-1", + Description: "Rolling restart maintenance task", + AvailabilityMode: Ydb_Maintenance.AvailabilityMode_AVAILABILITY_MODE_STRONG, + }, + ActionGroups: mock.MakeActionGroupsFromNodeIds(1, 2), + }, + }, + expectedOutputRegexps: []string{ + // Your task id is:\n\n\n\nPlease write it down for refreshing and completing the task later.\n + fmt.Sprintf("Your task id is:\n\n%s%s\n\n", cms.TaskUuidPrefix, uuidRegexpString), + }, + }, + { + ydbopsInvocation: Command{ + "--endpoint", "grpcs://localhost:2135", + "--verbose", + "--user", mock.TestUser, + "--ca-file", filepath.Join(".", "test-data", "ssl-data", "ca.crt"), + "maintenance", + "list", + }, + expectedRequests: []proto.Message{ + &Ydb_Auth.LoginRequest{ + User: mock.TestUser, + Password: mock.TestPassword, + }, + &Ydb_Discovery.WhoAmIRequest{}, + &Ydb_Maintenance.ListMaintenanceTasksRequest{ + User: &mock.TestUser, + }, + &Ydb_Maintenance.GetMaintenanceTaskRequest{ + TaskUid: "task-uuid-1", + }, + }, + expectedOutputRegexps: []string{ + fmt.Sprintf("Uid: %s%s\n", cms.TaskUuidPrefix, uuidRegexpString), + " Lock on node 1", + "PERFORMED", + " Lock on node 2", + "PENDING, (\\S+)", + }, + }, + { + ydbopsInvocation: Command{ + "--endpoint", "grpcs://localhost:2135", + "--verbose", + "--user", mock.TestUser, + "--ca-file", filepath.Join(".", "test-data", "ssl-data", "ca.crt"), + "maintenance", + "complete", + "--task-id", + testWillInsertTaskUuid, + "--hosts=1", + }, + expectedRequests: []proto.Message{ + &Ydb_Auth.LoginRequest{ + User: mock.TestUser, + Password: mock.TestPassword, + }, + &Ydb_Maintenance.GetMaintenanceTaskRequest{ + TaskUid: "task-UUID-1", + }, + &Ydb_Maintenance.CompleteActionRequest{ + ActionUids: []*Ydb_Maintenance.ActionUid{ + { + TaskUid: "task-UUID-1", + GroupId: "group-UUID-1", + ActionId: "action-UUID-1", + }, + }, + }, + }, + expectedOutputRegexps: []string{ + fmt.Sprintf(" Completed action id: %s, status: SUCCESS", uuidRegexpString), + }, + }, + { + ydbopsInvocation: Command{ + "--endpoint", "grpcs://localhost:2135", + "--verbose", + "--user", mock.TestUser, + "--ca-file", filepath.Join(".", "test-data", "ssl-data", "ca.crt"), + "maintenance", + "refresh", + "--task-id", + testWillInsertTaskUuid, + }, + expectedRequests: []proto.Message{ + &Ydb_Auth.LoginRequest{ + User: mock.TestUser, + Password: mock.TestPassword, + }, + &Ydb_Maintenance.RefreshMaintenanceTaskRequest{ + TaskUid: "task-uuid-1", + }, + }, + expectedOutputRegexps: []string{ + fmt.Sprintf("Uid: %s%s\n", cms.TaskUuidPrefix, uuidRegexpString), + " Lock on node 2", + "PERFORMED", + }, + }, + { + ydbopsInvocation: Command{ + "--endpoint", "grpcs://localhost:2135", + "--verbose", + "--user", mock.TestUser, + "--ca-file", filepath.Join(".", "test-data", "ssl-data", "ca.crt"), + "maintenance", + "complete", + "--task-id", + testWillInsertTaskUuid, + "--hosts=2", + }, + expectedRequests: []proto.Message{ + &Ydb_Auth.LoginRequest{ + User: mock.TestUser, + Password: mock.TestPassword, + }, + &Ydb_Maintenance.GetMaintenanceTaskRequest{ + TaskUid: "task-UUID-1", + }, + &Ydb_Maintenance.CompleteActionRequest{ + ActionUids: []*Ydb_Maintenance.ActionUid{ + { + TaskUid: "task-UUID-1", + GroupId: "group-UUID-1", + ActionId: "action-UUID-2", + }, + }, + }, + }, + expectedOutputRegexps: []string{ + fmt.Sprintf(" Completed action id: %s, status: SUCCESS", uuidRegexpString), + }, + }, + { + ydbopsInvocation: Command{ + "--endpoint", "grpcs://localhost:2135", + "--verbose", + "--user", mock.TestUser, + "--ca-file", filepath.Join(".", "test-data", "ssl-data", "ca.crt"), + "maintenance", + "list", }, expectedRequests: []proto.Message{ &Ydb_Auth.LoginRequest{ diff --git a/tests/mock/cms-nodes.go b/tests/mock/cms-nodes.go index b00c85c..d74c8c3 100644 --- a/tests/mock/cms-nodes.go +++ b/tests/mock/cms-nodes.go @@ -75,7 +75,7 @@ func MakeActionGroupsFromHostFQDNs(hostFQDNs ...string) []*Ydb_Maintenance.Actio Host: hostFQDN, }, }, - Duration: durationpb.New(1 * time.Hour), + Duration: durationpb.New(180 * time.Second), }, }, }, diff --git a/tests/profile_test.go b/tests/profile_test.go index 0e3025f..7dea4c6 100644 --- a/tests/profile_test.go +++ b/tests/profile_test.go @@ -18,7 +18,7 @@ var _ = Describe("Test Profile", func() { AfterEach(RunAfterEach) DescribeTable("profile", RunTestCase, - Entry("some basic options, no --profile option, active_profile in config", TestCase{ + Entry("some basic options, no --profile option, current-profile in config", TestCase{ nodeConfiguration: [][]uint32{ {1, 2, 3, 4, 5, 6, 7, 8}, }, @@ -27,7 +27,7 @@ var _ = Describe("Test Profile", func() { { ydbopsInvocation: Command{ "--profile-file", - filepath.Join(".", "test-data", "config_with_active_profile.yaml"), + filepath.Join(".", "test-data", "config_with_current_profile.yaml"), "--availability-mode", "strong", "--cms-query-interval", "1", "run", @@ -95,7 +95,7 @@ var _ = Describe("Test Profile", func() { }, }, ), - Entry("some basic options, --profile option specified, no active_profile in config", TestCase{ + Entry("some basic options, --profile option specified, no current-profile in config", TestCase{ nodeConfiguration: [][]uint32{ {1, 2, 3, 4, 5, 6, 7, 8}, }, @@ -104,7 +104,7 @@ var _ = Describe("Test Profile", func() { { ydbopsInvocation: Command{ "--profile-file", - filepath.Join(".", "test-data", "config_without_active_profile.yaml"), + filepath.Join(".", "test-data", "config_without_current_profile.yaml"), "--profile", "my-profile", "--availability-mode", "strong", diff --git a/tests/test-data/config_with_active_profile.yaml b/tests/test-data/config_with_current_profile.yaml similarity index 81% rename from tests/test-data/config_with_active_profile.yaml rename to tests/test-data/config_with_current_profile.yaml index 596d95f..b0bdf57 100644 --- a/tests/test-data/config_with_active_profile.yaml +++ b/tests/test-data/config_with_current_profile.yaml @@ -1,4 +1,4 @@ -active_profile: my-profile +current-profile: my-profile profiles: my-profile: endpoint: grpcs://localhost:2135 diff --git a/tests/test-data/config_without_active_profile.yaml b/tests/test-data/config_without_current_profile.yaml similarity index 100% rename from tests/test-data/config_without_active_profile.yaml rename to tests/test-data/config_without_current_profile.yaml