Skip to content

Commit

Permalink
Split Terminating into (soft) Terminating and HardTerminating (#…
Browse files Browse the repository at this point in the history
…791)

Currently, we use `Terminating` to represent both soft and hard
terminating. If a backend is hard-terminated while it is being
soft-terminated (which is allowed), a backend goes from a `Terminating`
state to another `Terminating` state. This is the only allowed state
transition that does not move forward in the state sequence.

The change in this PR is to split soft- and hard- `Terminating` states
into their own separate states. Soft-termination keeps the name
`Terminating`, and hard termination gets `HardTerminating`. The field
`termination` in `Terminating` remains, but is deprecated, and can only
be set to `Soft` in the constructor.
  • Loading branch information
paulgb authored Jul 24, 2024
1 parent 84e124c commit d9d16d9
Show file tree
Hide file tree
Showing 8 changed files with 133 additions and 86 deletions.
6 changes: 4 additions & 2 deletions docs/pages/concepts/backend-lifecycle.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ The statuses are:
- `starting`: The drone has loaded the image and is starting the container.
- `waiting`: The container has started. The drone is waiting for it to listen on an HTTP port.
- `ready`: The container is listening on an HTTP port. The drone is ready to route traffic to it.
- `terminating`: The drone has sent a request to terminate the backend. If the request was a “soft” request,
the backend may remain in this state for a grace period (by default 10 seoconds) before being hard-terminated.
- `terminating`: The drone has sent a “soft” request to terminate the backend.
The backend may remain in this state for a grace period (by default 10 seoconds) before being hard-terminated,
unless it exits on its own first.
- `hard-terminating`: The drone has sent a “hard” request to terminate the backend.
- `terminated`: The drone has terminated the backend. This is considered the only terminal state.

A backend may skip over some of these statuses, but will only transition to statuses lower in the list, never
Expand Down
4 changes: 3 additions & 1 deletion plane/src/controller/proxy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,9 @@ pub async fn handle_route_info_request(
}
break;
}
BackendState::Terminated { .. } | BackendState::Terminating { .. } => {
BackendState::Terminated { .. }
| BackendState::Terminating { .. }
| BackendState::HardTerminating { .. } => {
let response = RouteInfoResponse {
token,
route_info: None,
Expand Down
4 changes: 2 additions & 2 deletions plane/src/database/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ impl<'a> BackendDatabase<'a> {

let ready = match result.last_status.as_str() {
"ready" => true,
"terminated" | "terminating" => {
"terminated" | "terminating" | "hard-terminating" => {
return Ok(RouteInfoResult::NotFound);
}
_ => false,
Expand Down Expand Up @@ -392,7 +392,7 @@ impl<'a> BackendDatabase<'a> {

let ready = match result.last_status.as_str() {
"ready" => true,
"terminated" | "terminating" => {
"terminated" | "terminating" | "hard-terminating" => {
return Ok(RouteInfoResult::NotFound);
}
_ => false,
Expand Down
70 changes: 40 additions & 30 deletions plane/src/drone/backend_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,34 @@ impl Debug for BackendManager {
}
}

fn handle_terminating(
runtime: Arc<Box<dyn Runtime>>,
backend_id: &BackendName,
state: BackendState,
hard_terminate: bool,
) -> StepStatusResult {
let backend_id = backend_id.clone();

StepStatusResult::future_status(async move {
let mut backoff = ExponentialBackoff::default();

loop {
match runtime.terminate(&backend_id, hard_terminate).await {
Ok(false) => return state.to_terminated(None),
Ok(true) => {
// Return a future that never resolves, so that only the container
// terminating bumps us into the next state.
return pending().await;
}
Err(err) => {
tracing::error!(?err, "failed to terminate backend");
backoff.wait().await;
}
}
}
})
}

impl BackendManager {
#[allow(clippy::too_many_arguments)]
pub fn new(
Expand Down Expand Up @@ -173,41 +201,18 @@ impl BackendManager {
runtime.wait_for_backend(&backend_id, address.0).await
{
tracing::error!("Backend startup timeout");
state.to_terminating(
TerminationKind::Hard,
TerminationReason::StartupTimeout,
)
state.to_hard_terminating(TerminationReason::StartupTimeout)
} else {
state.to_ready(address)
}
})
}
BackendState::Ready { .. } => StepStatusResult::DoNothing,
BackendState::Terminating { termination, .. } => {
let docker = self.runtime.clone();
let backend_id = self.backend_id.clone();

StepStatusResult::future_status(async move {
let mut backoff = ExponentialBackoff::default();

loop {
match docker
.terminate(&backend_id, termination == TerminationKind::Hard)
.await
{
Ok(false) => return state.to_terminated(None),
Ok(true) => {
// Return a future that never resolves, so that only the container
// terminating bumps us into the next state.
return pending().await;
}
Err(err) => {
tracing::error!(?err, "failed to terminate backend");
backoff.wait().await;
}
}
}
})
BackendState::Terminating { .. } => {
handle_terminating(self.runtime.clone(), &self.backend_id, state, false)
}
BackendState::HardTerminating { .. } => {
handle_terminating(self.runtime.clone(), &self.backend_id, state, true)
}
BackendState::Terminated { .. } => StepStatusResult::DoNothing,
}
Expand Down Expand Up @@ -262,7 +267,12 @@ impl BackendManager {
.expect("State lock is poisoned")
.state
.clone();
self.set_state(state.to_terminating(kind, reason));

let new_state = match kind {
TerminationKind::Soft => state.to_terminating(reason),
TerminationKind::Hard => state.to_hard_terminating(reason),
};
self.set_state(new_state);

Ok(())
}
Expand Down
4 changes: 2 additions & 2 deletions plane/src/drone/executor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::{
drone::runtime::Runtime,
names::BackendName,
protocol::{BackendAction, BackendEventId, BackendStateMessage},
types::{BackendState, BackendStatus, TerminationKind, TerminationReason},
types::{BackendState, BackendStatus, TerminationReason},
util::{ExponentialBackoff, GuardHandle},
};
use anyhow::Result;
Expand Down Expand Up @@ -93,7 +93,7 @@ impl Executor {
.expect("State store lock poisoned.")
.register_event(
&backend_id,
&state.to_terminating(TerminationKind::Hard, TerminationReason::KeyExpired),
&state.to_hard_terminating(TerminationReason::KeyExpired),
Utc::now(),
)
.unwrap_or_else(|_| {
Expand Down
25 changes: 10 additions & 15 deletions plane/src/drone/state_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ mod test {
use crate::{
log_types::BackendAddr,
names::Name,
types::{BackendStatus, TerminationKind, TerminationReason},
types::{BackendStatus, TerminationReason},
};
use std::{
net::{SocketAddr, SocketAddrV4},
Expand Down Expand Up @@ -300,17 +300,16 @@ mod test {
state_store
.register_event(
&backend_id,
&ready_state.to_terminating(TerminationKind::Hard, TerminationReason::External),
&ready_state.to_hard_terminating(TerminationReason::External),
Utc::now(),
)
.unwrap();

let result = state_store.backend_state(&backend_id).unwrap();
assert_eq!(
result,
BackendState::Terminating {
BackendState::HardTerminating {
last_status: BackendStatus::Ready,
termination: TerminationKind::Hard,
reason: TerminationReason::External,
}
);
Expand Down Expand Up @@ -357,24 +356,23 @@ mod test {
state_store
.register_event(
&backend_id,
&ready_state.to_terminating(TerminationKind::Hard, TerminationReason::Swept),
&ready_state.to_hard_terminating(TerminationReason::Swept),
Utc::now(),
)
.unwrap();

let result = state_store.backend_state(&backend_id).unwrap();
assert_eq!(
result,
ready_state.to_terminating(TerminationKind::Hard, TerminationReason::Swept)
ready_state.to_hard_terminating(TerminationReason::Swept)
);

let event = recv.try_recv().unwrap();
assert_eq!(event.backend_id, backend_id);
assert_eq!(
event.state,
BackendState::Terminating {
BackendState::HardTerminating {
last_status: BackendStatus::Ready,
termination: TerminationKind::Hard,
reason: TerminationReason::Swept,
}
);
Expand All @@ -400,7 +398,7 @@ mod test {
state_store
.register_event(
&backend_id,
&ready_state.to_terminating(TerminationKind::Hard, TerminationReason::Swept),
&ready_state.to_hard_terminating(TerminationReason::Swept),
Utc::now(),
)
.unwrap();
Expand Down Expand Up @@ -429,9 +427,8 @@ mod test {
assert_eq!(event.event_id, BackendEventId::from(2));
assert_eq!(
event.state,
BackendState::Terminating {
BackendState::HardTerminating {
last_status: BackendStatus::Ready,
termination: TerminationKind::Hard,
reason: TerminationReason::Swept,
}
);
Expand Down Expand Up @@ -463,9 +460,8 @@ mod test {
assert_eq!(event.backend_id, backend_id);
assert_eq!(
event.state,
BackendState::Terminating {
BackendState::HardTerminating {
last_status: BackendStatus::Ready,
termination: TerminationKind::Hard,
reason: TerminationReason::Swept,
}
);
Expand All @@ -489,9 +485,8 @@ mod test {
assert_eq!(event.backend_id, backend_id);
assert_eq!(
event.state,
BackendState::Terminating {
BackendState::HardTerminating {
last_status: BackendStatus::Ready,
termination: TerminationKind::Hard,
reason: TerminationReason::Swept,
}
);
Expand Down
2 changes: 1 addition & 1 deletion plane/src/log_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ impl From<OffsetDateTime> for LoggableTime {
}
}

#[derive(Clone, Copy, Serialize, Deserialize, Debug, PartialEq, Eq)]
#[derive(Clone, Copy, Serialize, Deserialize, Debug, PartialEq, Eq, PartialOrd)]
pub struct BackendAddr(pub SocketAddr);

impl valuable::Valuable for BackendAddr {
Expand Down
Loading

0 comments on commit d9d16d9

Please sign in to comment.