From 5ae894ecc8ee362e44e470fbc0ccbe0353260bf8 Mon Sep 17 00:00:00 2001 From: Caleb Date: Sat, 15 Feb 2025 11:19:20 -0500 Subject: [PATCH 1/8] convert failover.* to new API --- .../middlewared/api/v25_10_0/failover.py | 103 +++++++++++ .../middlewared/plugins/failover.py | 168 +++++++++--------- 2 files changed, 190 insertions(+), 81 deletions(-) create mode 100644 src/middlewared/middlewared/api/v25_10_0/failover.py diff --git a/src/middlewared/middlewared/api/v25_10_0/failover.py b/src/middlewared/middlewared/api/v25_10_0/failover.py new file mode 100644 index 0000000000000..1fb1055ef284c --- /dev/null +++ b/src/middlewared/middlewared/api/v25_10_0/failover.py @@ -0,0 +1,103 @@ +from middlewared.api.base import BaseModel, NotRequired, single_argument_args + +from pydantic import HttpUrl + + +class FailoverGetIpsArgs(BaseModel): + pass + + +class FailoverGetIpsResult(BaseModel): + result: list[HttpUrl] + + +class FailoverBecomePassiveArgs(BaseModel): + pass + + +class FailoverBecomePassiveResult(BaseModel): + result: None + + +class FailoverLicensedArgs(BaseModel): + pass + + +class FailoverLicensedResult(BaseModel): + result: bool + + +class FailoverNodeArgs(BaseModel): + pass + + +class FailoverNodeResult(BaseModel): + result: str + + +class FailoverStatusArgs(BaseModel): + pass + + +class FailoverStatusResult(BaseModel): + result: str + + +class FailoverSyncFromPeerArgs(BaseModel): + pass + + +class FailoverSyncFromPeerResult(BaseModel): + result: str + +@single_argument_args("sync_to_peer") +class FailoverSyncToPeerArgs(BaseModel): + reboot: bool = False + """If set to True, will reboot the other controller.""" + + +class FailoverSyncToPeerResult(BaseModel): + result: None + + +class FailoverUpdateEntry(BaseModel): + id: int + disabled: bool + master: bool + timeout: int + + +@single_argument_args("failover_update") +class FailoverUpdateArgs(BaseModel): + disabled: bool + """When true HA will be administratively disabled.""" + master: bool = NotRequired + """Marks the particular node in the chassis as the master node. + The standby node will have the opposite value.""" + timeout: int + """The time to WAIT (in seconds) until a failover occurs when a network + event occurs on an interface that is marked critical for failover AND + HA is enabled and working appropriately. The default time to wait is + 2 seconds. + + **NOTE** + This setting does NOT effect the `disabled` or `master` parameters.""" + + +class FailoverUpdateResult(BaseModel): + result: FailoverUpdateEntry + + +class FailoverUpgradeArgs(BaseModel): + train: str = NotRequired + resume: bool = False + """Should be set to true if a previous call to this method returned a + `CallError` with `errno=EAGAIN` meaning that an upgrade can be performed + with a warning and that warning is accepted. In that case, you also have + to set `resume_manual` to `true` if a previous call to this method was + performed using update file upload.""" + resume_manual: bool = False + + +class FailoverUpgradeResult(BaseModel): + result: bool diff --git a/src/middlewared/middlewared/plugins/failover.py b/src/middlewared/middlewared/plugins/failover.py index bb362240d8ff0..a581c5abd1e21 100644 --- a/src/middlewared/middlewared/plugins/failover.py +++ b/src/middlewared/middlewared/plugins/failover.py @@ -10,11 +10,37 @@ import time from functools import partial +from middlewared.api import api_method +from middlewared.api.current import ( + FailoverBecomePassiveArgs, + FailoverBecomePassiveResult, + FailoverGetIpsArgs, + FailoverGetIpsResult, + FailoverLicensedArgs, + FailoverLicensedResult, + FailoverNodeArgs, + FailoverNodeResult, + FailoverStatusArgs, + FailoverStatusResult, + FailoverSyncFromPeerArgs, + FailoverSyncFromPeerResult, + FailoverSyncToPeerArgs, + FailoverSyncToPeerResult, + FailoverUpdateArgs, + FailoverUpdateEntry, + FailoverUpdateResult, + FailoverUpgradeArgs, + FailoverUpgradeResult, +) from middlewared.auth import TruenasNodeSessionManagerCredentials -from middlewared.schema import accepts, Bool, Dict, Int, List, NOT_PROVIDED, Str, returns, Patch +from middlewared.schema import NOT_PROVIDED from middlewared.service import ( - job, no_authz_required, pass_app, private, CallError, ConfigService, - ValidationError, ValidationErrors + job, + private, + CallError, + ConfigService, + ValidationError, + ValidationErrors ) import middlewared.sqlalchemy as sa from middlewared.plugins.auth import AuthService @@ -53,49 +79,24 @@ class Config: datastore_extend = 'failover.failover_extend' cli_private = True role_prefix = 'FAILOVER' - - ENTRY = Dict( - 'failover_entry', - Int('id', required=True), - Bool('disabled', required=True), - Int('timeout', required=True), - Bool('master', required=True), - ) + entry = FailoverUpdateEntry @private async def failover_extend(self, data): data['master'] = await self.middleware.call('failover.node') == data.pop('master_node') return data - @accepts(Patch( - 'failover_entry', 'failover_update', - ('edit', {'name': 'master', 'method': lambda x: setattr(x, 'null', True)}), - ('rm', {'name': 'id'}), - ('attr', {'update': True}), - ), audit='Failover config update') + @api_method( + FailoverUpdateArgs, + FailoverUpdateResult, + audit='Failover config update', + ) async def do_update(self, data): - """ - Update failover state. - - `disabled` When true indicates that HA will be disabled. - `master` Marks the particular node in the chassis as the master node. - The standby node will have the opposite value. - - `timeout` is the time to WAIT until a failover occurs when a network - event occurs on an interface that is marked critical for failover AND - HA is enabled and working appropriately. - - The default time to wait is 2 seconds. - **NOTE** - This setting does NOT effect the `disabled` or `master` parameters. - """ + """Update failover configuration.""" master = data.pop('master', NOT_PROVIDED) - old = await self.middleware.call('datastore.config', 'system.failover') - new = old.copy() new.update(data) - if master is not NOT_PROVIDED: # The node making the call is the one we want to make MASTER by default new['master_node'] = await self.middleware.call('failover.node') @@ -136,9 +137,11 @@ async def _master_node(self, master): else: raise CallError('Unable to change node state in MANUAL mode') - @no_authz_required - @accepts() - @returns(Bool()) + @api_method( + FailoverLicensedArgs, + FailoverLicensedResult, + authorization_required=False, + ) def licensed(self): """Checks whether this instance is licensed as a HA unit""" try: @@ -171,8 +174,11 @@ async def hardware(self): """ return (await self.ha_mode())[0] - @accepts(roles=['FAILOVER_READ']) - @returns(Str()) + @api_method( + FailoverNodeArgs, + FailoverNodeResult, + roles=['FAILOVER_READ'] + ) async def node(self): """ Returns the slot position in the chassis that @@ -191,9 +197,12 @@ async def internal_interfaces(self): ints = await self.middleware.call('failover.internal_interface.detect') return list(ints) - @accepts(roles=['FAILOVER_READ']) - @returns(Str()) - @pass_app(rest=True) + @api_method( + FailoverStatusArgs, + FailoverStatusResult, + pass_app=True, + roles=['FAILOVER_READ'] + ) async def status(self, app): """ Get the current HA status. @@ -268,15 +277,21 @@ def in_progress(self): ) return bool(event) - @accepts(roles=['FAILOVER_READ']) - @returns(List('ips', items=[Str('ip')])) - @pass_app(rest=True) - async def get_ips(self, app): + @api_method( + FailoverGetIpsArgs, + FailoverGetIpsResult, + roles=['FAILOVER_READ'] + ) + async def get_ips(self): """Get a list of IPs for which the webUI can be accessed.""" return await self.middleware.call('system.general.get_ui_urls') - @accepts(audit='Failover become passive', roles=['FAILOVER_WRITE']) - @returns() + @api_method( + FailoverBecomePassiveArgs, + FailoverBecomePassiveResult, + audit='Failover become passive', + roles=['FAILOVER_WRITE'] + ) def become_passive(self): """ This method is only called manually by the end-user so we fully expect that they @@ -333,11 +348,11 @@ async def force_master(self): rc = await self.middleware.call('failover.fenced.start', True) return not rc if rc != 6 else bool(rc) # 6 means already running - @accepts(Dict( - 'options', - Bool('reboot', default=False), - ), roles=['FAILOVER_WRITE']) - @returns() + @api_method( + FailoverSyncToPeerArgs, + FailoverSyncToPeerResult, + roles=['FAILOVER_WRITE'], + ) def sync_to_peer(self, options): """ Sync database and files to the other controller. @@ -379,8 +394,11 @@ def sync_to_peer(self, options): if options['reboot']: self.middleware.call_sync('failover.call_remote', 'system.reboot', ['Failover sync to peer', {'delay': 2}]) - @accepts(roles=['FAILOVER_WRITE']) - @returns() + @api_method( + FailoverSyncFromPeerArgs, + FailoverSyncFromPeerResult, + roles=['FAILOVER_WRITE'], + ) def sync_from_peer(self): """ Sync database and files from the other controller. @@ -577,31 +595,20 @@ async def is_single_master_node(self): def upgrade_version(self): return 1 - @accepts(Dict( - 'failover_upgrade', - Str('train', empty=False), - Bool('resume', default=False), - Bool('resume_manual', default=False), - ), roles=['FAILOVER_WRITE'], audit='Failover upgrade') - @returns(Bool()) + @api_method( + FailoverUpgradeArgs, + FailoverUpgradeResult, + roles=['FAILOVER_WRITE'], + audit='Failover upgrade', + ) @job(lock='failover_upgrade', pipes=['input'], check_pipes=False) def upgrade(self, job, options): + """Upgrades both controllers. Files will be downloaded to the + Active Controller and then transferred to the Standby Controller. + Upgrade process will start concurrently on both nodes. Once both + upgrades are applied, the Standby Controller will reboot. This + job will wait for that job to complete before finalizing. """ - Upgrades both controllers. - - Files will be downloaded to the Active Controller and then transferred to the Standby - Controller. - - Upgrade process will start concurrently on both nodes. - - Once both upgrades are applied, the Standby Controller will reboot. This job will wait for - that job to complete before finalizing. - - `resume` should be set to `true` if a previous call to this method returned a `CallError` with `errno=EAGAIN` - meaning that an upgrade can be performed with a warning and that warning is accepted. In that case, you also - have to set `resume_manual` to `true` if a previous call to this method was performed using update file upload. - """ - if self.middleware.call_sync('failover.status') != 'MASTER': raise CallError('Upgrade can only run on Active Controller.') @@ -615,12 +622,11 @@ def upgrade(self, job, options): else: updatefile = options['resume_manual'] - train = options.get('train') - if train: + train = options.get('train', NOT_PROVIDED) + if train is not NOT_PROVIDED: self.middleware.call_sync('update.set_train', train) local_path = self.middleware.call_sync('update.get_update_location') - updatefile_name = 'updatefile.sqsh' updatefile_localpath = os.path.join(local_path, updatefile_name) if not options['resume'] and updatefile: From 43b7203e6039423c8ae3469c5fa45baf2659cf77 Mon Sep 17 00:00:00 2001 From: Caleb Date: Sat, 15 Feb 2025 11:23:23 -0500 Subject: [PATCH 2/8] train should be non-empty, if provided --- src/middlewared/middlewared/api/v25_10_0/failover.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/middlewared/middlewared/api/v25_10_0/failover.py b/src/middlewared/middlewared/api/v25_10_0/failover.py index 1fb1055ef284c..946ea4104757e 100644 --- a/src/middlewared/middlewared/api/v25_10_0/failover.py +++ b/src/middlewared/middlewared/api/v25_10_0/failover.py @@ -1,4 +1,4 @@ -from middlewared.api.base import BaseModel, NotRequired, single_argument_args +from middlewared.api.base import BaseModel, NonEmptyString, NotRequired, single_argument_args from pydantic import HttpUrl @@ -89,7 +89,7 @@ class FailoverUpdateResult(BaseModel): class FailoverUpgradeArgs(BaseModel): - train: str = NotRequired + train: NonEmptyString = NotRequired resume: bool = False """Should be set to true if a previous call to this method returned a `CallError` with `errno=EAGAIN` meaning that an upgrade can be performed From 0116a93927b39611e6fe5daf2b3c4acf1afc9b2c Mon Sep 17 00:00:00 2001 From: Caleb Date: Sat, 15 Feb 2025 11:33:24 -0500 Subject: [PATCH 3/8] missing import --- src/middlewared/middlewared/api/v25_10_0/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/middlewared/middlewared/api/v25_10_0/__init__.py b/src/middlewared/middlewared/api/v25_10_0/__init__.py index d0366f73c6fdf..047b97e56fddd 100644 --- a/src/middlewared/middlewared/api/v25_10_0/__init__.py +++ b/src/middlewared/middlewared/api/v25_10_0/__init__.py @@ -29,6 +29,7 @@ from .docker_network import * # noqa from .enclosure2 import * # noqa from .enclosure_label import * # noqa +from .failover import * # noqa from .failover_reboot import * # noqa from .fc import * # noqa from .fc_host import * # noqa From 5bfd7d7503fedc9b1cb0e2eba47bd796627b2dd9 Mon Sep 17 00:00:00 2001 From: Caleb Date: Sun, 16 Feb 2025 07:39:06 -0500 Subject: [PATCH 4/8] timeout isn't required --- src/middlewared/middlewared/api/v25_10_0/failover.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/middlewared/middlewared/api/v25_10_0/failover.py b/src/middlewared/middlewared/api/v25_10_0/failover.py index 946ea4104757e..95d8080018055 100644 --- a/src/middlewared/middlewared/api/v25_10_0/failover.py +++ b/src/middlewared/middlewared/api/v25_10_0/failover.py @@ -74,7 +74,7 @@ class FailoverUpdateArgs(BaseModel): master: bool = NotRequired """Marks the particular node in the chassis as the master node. The standby node will have the opposite value.""" - timeout: int + timeout: int = NotRequired """The time to WAIT (in seconds) until a failover occurs when a network event occurs on an interface that is marked critical for failover AND HA is enabled and working appropriately. The default time to wait is From 1657e2d59569b132f7791fe69a63fe915df009f4 Mon Sep 17 00:00:00 2001 From: caleb Date: Tue, 18 Feb 2025 12:48:37 -0500 Subject: [PATCH 5/8] address review --- .../middlewared/api/v25_10_0/failover.py | 68 +++++++++++-------- .../middlewared/plugins/failover.py | 4 +- 2 files changed, 40 insertions(+), 32 deletions(-) diff --git a/src/middlewared/middlewared/api/v25_10_0/failover.py b/src/middlewared/middlewared/api/v25_10_0/failover.py index 95d8080018055..888316fe2f655 100644 --- a/src/middlewared/middlewared/api/v25_10_0/failover.py +++ b/src/middlewared/middlewared/api/v25_10_0/failover.py @@ -1,8 +1,42 @@ -from middlewared.api.base import BaseModel, NonEmptyString, NotRequired, single_argument_args +from middlewared.api.base import ( + BaseModel, + Excluded, + excluded_field, + ForUpdateMetaclass, + NonEmptyString, + NotRequired, + single_argument_args +) from pydantic import HttpUrl +class FailoverEntry(BaseModel): + id: int + disabled: bool + """When true HA will be administratively disabled.""" + master: bool + """Marks the particular node in the chassis as the master node. + The standby node will have the opposite value.""" + timeout: int + """The time to WAIT (in seconds) until a failover occurs when a network + event occurs on an interface that is marked critical for failover AND + HA is enabled and working appropriately. The default time to wait is + 2 seconds. + + **NOTE** + This setting does NOT effect the `disabled` or `master` parameters.""" + + +class FailoverUpdateArgs(FailoverEntry, metaclass=ForUpdateMetaclass): + id: Excluded = excluded_field() + master: bool | None + + +class FailoverUpdateResult(BaseModel): + result: FailoverEntry + + class FailoverGetIpsArgs(BaseModel): pass @@ -48,7 +82,8 @@ class FailoverSyncFromPeerArgs(BaseModel): class FailoverSyncFromPeerResult(BaseModel): - result: str + result: None + @single_argument_args("sync_to_peer") class FailoverSyncToPeerArgs(BaseModel): @@ -60,34 +95,7 @@ class FailoverSyncToPeerResult(BaseModel): result: None -class FailoverUpdateEntry(BaseModel): - id: int - disabled: bool - master: bool - timeout: int - - -@single_argument_args("failover_update") -class FailoverUpdateArgs(BaseModel): - disabled: bool - """When true HA will be administratively disabled.""" - master: bool = NotRequired - """Marks the particular node in the chassis as the master node. - The standby node will have the opposite value.""" - timeout: int = NotRequired - """The time to WAIT (in seconds) until a failover occurs when a network - event occurs on an interface that is marked critical for failover AND - HA is enabled and working appropriately. The default time to wait is - 2 seconds. - - **NOTE** - This setting does NOT effect the `disabled` or `master` parameters.""" - - -class FailoverUpdateResult(BaseModel): - result: FailoverUpdateEntry - - +@single_argument_args("failover_upgrade") class FailoverUpgradeArgs(BaseModel): train: NonEmptyString = NotRequired resume: bool = False diff --git a/src/middlewared/middlewared/plugins/failover.py b/src/middlewared/middlewared/plugins/failover.py index a581c5abd1e21..a0f35e4409fa8 100644 --- a/src/middlewared/middlewared/plugins/failover.py +++ b/src/middlewared/middlewared/plugins/failover.py @@ -14,6 +14,7 @@ from middlewared.api.current import ( FailoverBecomePassiveArgs, FailoverBecomePassiveResult, + FailoverEntry, FailoverGetIpsArgs, FailoverGetIpsResult, FailoverLicensedArgs, @@ -27,7 +28,6 @@ FailoverSyncToPeerArgs, FailoverSyncToPeerResult, FailoverUpdateArgs, - FailoverUpdateEntry, FailoverUpdateResult, FailoverUpgradeArgs, FailoverUpgradeResult, @@ -79,7 +79,7 @@ class Config: datastore_extend = 'failover.failover_extend' cli_private = True role_prefix = 'FAILOVER' - entry = FailoverUpdateEntry + entry = FailoverEntry @private async def failover_extend(self, data): From da03d4acb84b5739f7b634bb2f632e87c7178d7c Mon Sep 17 00:00:00 2001 From: Logan Cary Date: Tue, 18 Feb 2025 15:41:38 -0500 Subject: [PATCH 6/8] use single_argument_args --- src/middlewared/middlewared/api/v25_10_0/failover.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/middlewared/middlewared/api/v25_10_0/failover.py b/src/middlewared/middlewared/api/v25_10_0/failover.py index 888316fe2f655..4af62e4bc3787 100644 --- a/src/middlewared/middlewared/api/v25_10_0/failover.py +++ b/src/middlewared/middlewared/api/v25_10_0/failover.py @@ -1,3 +1,5 @@ +from pydantic import HttpUrl + from middlewared.api.base import ( BaseModel, Excluded, @@ -8,8 +10,6 @@ single_argument_args ) -from pydantic import HttpUrl - class FailoverEntry(BaseModel): id: int @@ -28,6 +28,7 @@ class FailoverEntry(BaseModel): This setting does NOT effect the `disabled` or `master` parameters.""" +@single_argument_args("failover_update") class FailoverUpdateArgs(FailoverEntry, metaclass=ForUpdateMetaclass): id: Excluded = excluded_field() master: bool | None From 35ec09b8acadb194600a186199f7e78e963b023f Mon Sep 17 00:00:00 2001 From: Logan Cary Date: Wed, 19 Feb 2025 10:09:50 -0500 Subject: [PATCH 7/8] don't accept None for master --- src/middlewared/middlewared/api/v25_10_0/failover.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/middlewared/middlewared/api/v25_10_0/failover.py b/src/middlewared/middlewared/api/v25_10_0/failover.py index 4af62e4bc3787..4f9f36e604cba 100644 --- a/src/middlewared/middlewared/api/v25_10_0/failover.py +++ b/src/middlewared/middlewared/api/v25_10_0/failover.py @@ -31,7 +31,6 @@ class FailoverEntry(BaseModel): @single_argument_args("failover_update") class FailoverUpdateArgs(FailoverEntry, metaclass=ForUpdateMetaclass): id: Excluded = excluded_field() - master: bool | None class FailoverUpdateResult(BaseModel): From 1a6f0cbb3283f44f90d8ddea5eae89f222cf74a8 Mon Sep 17 00:00:00 2001 From: Logan Cary Date: Wed, 19 Feb 2025 11:18:01 -0500 Subject: [PATCH 8/8] more defaults --- .../middlewared/api/v25_10_0/failover.py | 56 +++++++++++-------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/src/middlewared/middlewared/api/v25_10_0/failover.py b/src/middlewared/middlewared/api/v25_10_0/failover.py index 4f9f36e604cba..c70f6188a993a 100644 --- a/src/middlewared/middlewared/api/v25_10_0/failover.py +++ b/src/middlewared/middlewared/api/v25_10_0/failover.py @@ -1,4 +1,4 @@ -from pydantic import HttpUrl +from pydantic import Field from middlewared.api.base import ( BaseModel, @@ -7,7 +7,6 @@ ForUpdateMetaclass, NonEmptyString, NotRequired, - single_argument_args ) @@ -28,29 +27,40 @@ class FailoverEntry(BaseModel): This setting does NOT effect the `disabled` or `master` parameters.""" -@single_argument_args("failover_update") -class FailoverUpdateArgs(FailoverEntry, metaclass=ForUpdateMetaclass): +class FailoverSyncToPeer(BaseModel): + reboot: bool = False + """If set to True, will reboot the other controller.""" + + +class FailoverUpdate(FailoverEntry, metaclass=ForUpdateMetaclass): id: Excluded = excluded_field() -class FailoverUpdateResult(BaseModel): - result: FailoverEntry +class FailoverUpgrade(BaseModel): + train: NonEmptyString = NotRequired + resume: bool = False + """Should be set to true if a previous call to this method returned a + `CallError` with `errno=EAGAIN` meaning that an upgrade can be performed + with a warning and that warning is accepted. In that case, you also have + to set `resume_manual` to `true` if a previous call to this method was + performed using update file upload.""" + resume_manual: bool = False -class FailoverGetIpsArgs(BaseModel): +class FailoverBecomePassiveArgs(BaseModel): pass -class FailoverGetIpsResult(BaseModel): - result: list[HttpUrl] +class FailoverBecomePassiveResult(BaseModel): + result: None -class FailoverBecomePassiveArgs(BaseModel): +class FailoverGetIpsArgs(BaseModel): pass -class FailoverBecomePassiveResult(BaseModel): - result: None +class FailoverGetIpsResult(BaseModel): + result: list[str] class FailoverLicensedArgs(BaseModel): @@ -85,26 +95,24 @@ class FailoverSyncFromPeerResult(BaseModel): result: None -@single_argument_args("sync_to_peer") class FailoverSyncToPeerArgs(BaseModel): - reboot: bool = False - """If set to True, will reboot the other controller.""" + options: FailoverSyncToPeer = Field(default_factory=FailoverSyncToPeer) class FailoverSyncToPeerResult(BaseModel): result: None -@single_argument_args("failover_upgrade") +class FailoverUpdateArgs(BaseModel): + data: FailoverUpdate + + +class FailoverUpdateResult(BaseModel): + result: FailoverEntry + + class FailoverUpgradeArgs(BaseModel): - train: NonEmptyString = NotRequired - resume: bool = False - """Should be set to true if a previous call to this method returned a - `CallError` with `errno=EAGAIN` meaning that an upgrade can be performed - with a warning and that warning is accepted. In that case, you also have - to set `resume_manual` to `true` if a previous call to this method was - performed using update file upload.""" - resume_manual: bool = False + failover_upgrade: FailoverUpgrade = Field(default_factory=FailoverUpgrade) class FailoverUpgradeResult(BaseModel):