Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor maintenance options #44

Merged
merged 7 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changes/unreleased/Changed-20241119-223336.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
kind: Changed
body: Active profile now uses 'current-profile' key in yaml config, rather than 'active_profile', to comply with the docs
time: 2024-11-19T22:33:36.858984953+01:00
3 changes: 3 additions & 0 deletions .changes/unreleased/Fixed-20241119-223223.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
kind: Fixed
body: '''ydbops maintenance'' command could not accept nodeIds in ''--hosts'' option (e.g. --hosts=1,2)'
time: 2024-11-19T22:32:23.277140842+01:00
3 changes: 3 additions & 0 deletions .changes/unreleased/Fixed-20241119-223739.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
kind: Fixed
body: '''ydbops maintenance'' subtree should now properly use filters such as ''started'', ''version'' etc.'
time: 2024-11-19T22:37:39.439538408+01:00
2 changes: 1 addition & 1 deletion .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
uses: golangci/golangci-lint-action@v6
with:
version: v1.61.0
args: --verbose --disable-all --new-from-rev=origin/master --enable wrapcheck,stylecheck,funlen,mnd,cyclop
args: --verbose --disable-all --new-from-rev=origin/master --enable stylecheck,funlen

tests:
name: run tests
Expand Down
14 changes: 8 additions & 6 deletions cmd/maintenance/complete/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,22 @@ import (
)

type Options struct {
TaskID string
HostFQDNs []string
TaskID string
Hosts []string
}

func (o *Options) DefineFlags(fs *pflag.FlagSet) {
fs.StringSliceVar(&o.HostFQDNs, "hosts", []string{},
"FQDNs of hosts with completed maintenance")
fs.StringSliceVar(&o.Hosts, "hosts", []string{},
`FQDNs or nodeIds of hosts with completed maintenance. You can specify a list of host FQDNs or a list of node ids,
but you can not mix host FQDNs and node ids in this option. The list is comma-delimited.
E.g.: '--hosts=1,2,3' or '--hosts=fqdn1,fqdn2,fqdn3'`)
fs.StringVar(&o.TaskID, "task-id", "",
"ID of your maintenance task (result of `ydbops maintenance host`)")
}

func (o *Options) Validate() error {
// TODO(shmel1k@): remove copypaste between drop, create & refresh methods.
if len(o.HostFQDNs) == 0 {
if len(o.Hosts) == 0 {
return fmt.Errorf("--hosts unspecified")
}
if o.TaskID == "" {
Expand All @@ -33,7 +35,7 @@ func (o *Options) Validate() error {
}

func (o *Options) Run(f cmdutil.Factory) error {
result, err := f.GetCMSClient().CompleteActions(o.TaskID, o.HostFQDNs)
result, err := f.GetCMSClient().CompleteActions(o.TaskID, o.Hosts)
if err != nil {
return err
}
Expand Down
5 changes: 1 addition & 4 deletions cmd/maintenance/create/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,10 @@ import (

"github.com/ydb-platform/ydbops/pkg/cli"
"github.com/ydb-platform/ydbops/pkg/cmdutil"
"github.com/ydb-platform/ydbops/pkg/rolling"
)

func New(f cmdutil.Factory) *cobra.Command {
opts := &Options{
RestartOptions: &rolling.RestartOptions{},
}
opts := &Options{}

cmd := cli.SetDefaultsOn(&cobra.Command{
Use: "create",
Expand Down
116 changes: 103 additions & 13 deletions cmd/maintenance/create/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,41 +8,131 @@ import (
"github.com/spf13/pflag"
"google.golang.org/protobuf/types/known/durationpb"

"github.com/ydb-platform/ydb-go-genproto/draft/protos/Ydb_Maintenance"

"github.com/ydb-platform/ydbops/cmd/restart"
"github.com/ydb-platform/ydbops/pkg/client/cms"
"github.com/ydb-platform/ydbops/pkg/cmdutil"
"github.com/ydb-platform/ydbops/pkg/rolling"
"github.com/ydb-platform/ydbops/pkg/options"
"github.com/ydb-platform/ydbops/pkg/prettyprint"
"github.com/ydb-platform/ydbops/pkg/rolling/restarters"
"github.com/ydb-platform/ydbops/pkg/utils"
)

type Options struct {
*rolling.RestartOptions
options.FilteringOptions

MaintenanceDuration int
}

const (
DefaultMaintenanceDurationSeconds = 3600
)

func (o *Options) DefineFlags(fs *pflag.FlagSet) {
o.RestartOptions.DefineFlags(fs)
o.FilteringOptions.DefineFlags(fs)

fs.IntVar(&o.MaintenanceDuration, "duration", DefaultMaintenanceDurationSeconds,
`CMS will release the node for maintenance for duration seconds. Any maintenance
after that would be considered a regular cluster failure`)
}

func (o *Options) Validate() error {
return o.RestartOptions.Validate()
if o.MaintenanceDuration < 0 {
return fmt.Errorf("specified invalid maintenance duration: %d. Must be positive", o.MaintenanceDuration)
}

return o.FilteringOptions.Validate()
}

func (o *Options) nodeIdsToNodes(
nodes []*Ydb_Maintenance.Node,
nodeIds []uint32,
) []*Ydb_Maintenance.Node {
targetedNodes := []*Ydb_Maintenance.Node{}

// TODO @jorres arguments to PrepareRestarters are a dirty hack.
// We actually only need Filter component from restarters. 2 and 3 arguments
// are required in PrepareRestarters to actually perform node restarts,
// but we only use restarters in the scope of this function to filter nodes
// so their value does not matter. Splitting something like 'Filterers' from
// Restarters into separate interface should solve this.
storageRestarter, tenantRestarter := restart.PrepareRestarters(
&o.FilteringOptions,
[]string{},
"",
o.MaintenanceDuration,
)

filterNodeParams := restarters.FilterNodeParams{
Version: o.VersionSpec,
SelectedTenants: o.TenantList,
SelectedNodeIds: nodeIds,
SelectedHosts: []string{},
SelectedDatacenters: o.Datacenters,
StartedTime: o.StartedTime,
ExcludeHosts: o.ExcludeHosts,
MaxStaticNodeID: uint32(o.MaxStaticNodeID),
}

clusterNodesInfo := restarters.ClusterNodesInfo{
AllNodes: nodes,
TenantToNodeIds: utils.PopulateTenantToNodesMapping(nodes),
}

targetedNodes = append(targetedNodes, storageRestarter.Filter(filterNodeParams, clusterNodesInfo)...)
targetedNodes = append(targetedNodes, tenantRestarter.Filter(filterNodeParams, clusterNodesInfo)...)

return targetedNodes
}

func (o *Options) Run(f cmdutil.Factory) error {
taskUID := cms.TaskUuidPrefix + uuid.New().String()
duration := time.Duration(o.RestartOptions.RestartDuration) * time.Minute
taskId, err := f.GetCMSClient().CreateMaintenanceTask(cms.MaintenanceTaskParams{
Hosts: o.RestartOptions.Hosts,
Duration: durationpb.New(duration),
AvailabilityMode: o.RestartOptions.GetAvailabilityMode(),
ScopeType: cms.HostScope,
TaskUID: taskUID,
})
duration := time.Duration(o.MaintenanceDuration) * time.Second

nodes, err := f.GetCMSClient().Nodes()
if err != nil {
return err
}
nodeIds, errIds := utils.GetNodeIds(o.Hosts)
hostFQDNs, errFqdns := utils.GetNodeFQDNs(o.Hosts)
if errIds != nil && errFqdns != nil {
return fmt.Errorf(
"failed to parse --hosts argument as node ids (%w) or host fqdns (%w)",
errIds,
errFqdns,
)
}

var task cms.MaintenanceTask
if errIds == nil {
task, err = f.GetCMSClient().CreateMaintenanceTask(cms.MaintenanceTaskParams{
Nodes: o.nodeIdsToNodes(nodes, nodeIds),
Duration: durationpb.New(duration),
AvailabilityMode: o.GetAvailabilityMode(),
ScopeType: cms.NodeScope,
TaskUID: taskUID,
})
} else {
task, err = f.GetCMSClient().CreateMaintenanceTask(cms.MaintenanceTaskParams{
Hosts: hostFQDNs,
Duration: durationpb.New(duration),
AvailabilityMode: o.GetAvailabilityMode(),
ScopeType: cms.HostScope,
TaskUID: taskUID,
})
}

if err != nil {
return err
}

fmt.Printf(
"Your task id is:\n\n%s\n\nPlease write it down for refreshing and completing the task later.\n",
taskId.GetTaskUid(),
task.GetTaskUid(),
)

fmt.Println(prettyprint.TaskToString(task))

return nil
}
15 changes: 3 additions & 12 deletions cmd/maintenance/maintenance.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,34 +10,25 @@ import (
"github.com/ydb-platform/ydbops/cmd/maintenance/refresh"
"github.com/ydb-platform/ydbops/pkg/cli"
"github.com/ydb-platform/ydbops/pkg/cmdutil"
"github.com/ydb-platform/ydbops/pkg/command"
)

type Options struct {
*command.BaseOptions
}

func New(f cmdutil.Factory) *cobra.Command {
options := &Options{}
c := cli.SetDefaultsOn(&cobra.Command{
cmd := cli.SetDefaultsOn(&cobra.Command{
Use: "maintenance",
Short: "Request hosts from the Cluster Management System",
Long: `ydbops maintenance [command]:
Manage host maintenance operations: request and return hosts
with performed maintenance back to the cluster.`,
PreRunE: cli.PopulateProfileDefaultsAndValidate(
options.BaseOptions, options,
),
RunE: cli.RequireSubcommand,
})

c.AddCommand(
cmd.AddCommand(
complete.New(f),
create.New(f),
drop.New(f),
list.New(f),
refresh.New(f),
)

return c
return cmd
}
61 changes: 37 additions & 24 deletions cmd/restart/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,44 +20,57 @@ func (o *Options) DefineFlags(fs *pflag.FlagSet) {
o.RestartOptions.DefineFlags(fs)
}

func (o *Options) Run(f cmdutil.Factory) error {
var storageRestarter restarters.Restarter
var tenantRestarter restarters.Restarter

if o.RestartOptions.KubeconfigPath != "" {
storageRestarter = restarters.NewStorageK8sRestarter(
func PrepareRestarters(
opts *options.FilteringOptions,
sshArgs []string,
customSystemdUnitName string,
restartDuration int,
) (storage, tenant restarters.Restarter) {
if opts.KubeconfigPath != "" {
storage = restarters.NewStorageK8sRestarter(
options.Logger,
&restarters.StorageK8sRestarterOptions{
K8sRestarterOptions: &restarters.K8sRestarterOptions{
KubeconfigPath: o.RestartOptions.KubeconfigPath,
Namespace: o.RestartOptions.K8sNamespace,
RestartDuration: time.Duration(o.RestartOptions.RestartDuration) * time.Second,
KubeconfigPath: opts.KubeconfigPath,
Namespace: opts.K8sNamespace,
RestartDuration: time.Duration(restartDuration) * time.Second,
},
},
)
tenantRestarter = restarters.NewTenantK8sRestarter(
tenant = restarters.NewTenantK8sRestarter(
options.Logger,
&restarters.TenantK8sRestarterOptions{
K8sRestarterOptions: &restarters.K8sRestarterOptions{
KubeconfigPath: o.RestartOptions.KubeconfigPath,
Namespace: o.RestartOptions.K8sNamespace,
RestartDuration: time.Duration(o.RestartOptions.RestartDuration) * time.Second,
KubeconfigPath: opts.KubeconfigPath,
Namespace: opts.K8sNamespace,
RestartDuration: time.Duration(restartDuration) * time.Second,
},
},
)
} else {
storageRestarter = restarters.NewStorageSSHRestarter(
options.Logger,
o.RestartOptions.SSHArgs,
o.RestartOptions.CustomSystemdUnitName,
)
tenantRestarter = restarters.NewTenantSSHRestarter(
options.Logger,
o.RestartOptions.SSHArgs,
o.RestartOptions.CustomSystemdUnitName,
)
return storage, tenant
}

storage = restarters.NewStorageSSHRestarter(
options.Logger,
sshArgs,
customSystemdUnitName,
)
tenant = restarters.NewTenantSSHRestarter(
options.Logger,
sshArgs,
customSystemdUnitName,
)
return storage, tenant
}

func (o *Options) Run(f cmdutil.Factory) error {
storageRestarter, tenantRestarter := PrepareRestarters(
&o.FilteringOptions,
o.SSHArgs,
o.CustomSystemdUnitName,
o.RestartDuration,
)

bothUnspecified := !o.RestartOptions.Storage && !o.RestartOptions.Tenant

var executer rolling.Executer
Expand Down
1 change: 1 addition & 0 deletions pkg/client/cms/cms.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ func (c *defaultCMSClient) CreateMaintenanceTask(params MaintenanceTaskParams) (
},
}

fmt.Println(params.Duration)
if params.ScopeType == NodeScope {
request.ActionGroups = actionGroupsFromNodes(params)
} else { // HostScope
Expand Down
Loading
Loading