From aa794a2b82f504dfc70d21066017fbf404a21dc6 Mon Sep 17 00:00:00 2001 From: chesedo Date: Thu, 15 Dec 2022 11:39:38 +0200 Subject: [PATCH 1/6] refactor: connect to user network on startup --- gateway/src/project.rs | 72 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 59 insertions(+), 13 deletions(-) diff --git a/gateway/src/project.rs b/gateway/src/project.rs index e77f4a62e..b312dc97b 100644 --- a/gateway/src/project.rs +++ b/gateway/src/project.rs @@ -8,6 +8,8 @@ use bollard::container::{ }; use bollard::errors::Error as DockerError; use bollard::models::{ContainerInspectResponse, ContainerStateStatusEnum}; +use bollard::network::{ConnectNetworkOptions, DisconnectNetworkOptions}; +use bollard::service::EndpointSettings; use bollard::system::EventsOptions; use fqdn::FQDN; use futures::prelude::*; @@ -19,7 +21,7 @@ use once_cell::sync::Lazy; use rand::distributions::{Alphanumeric, DistString}; use serde::{Deserialize, Serialize}; use tokio::time::{self, timeout}; -use tracing::{debug, error, instrument}; +use tracing::{debug, error, info, instrument}; use crate::{ ContainerSettings, DockerContext, EndState, Error, ErrorKind, IntoTryState, ProjectName, @@ -300,6 +302,11 @@ where if let Ok(Self::Errored(errored)) = &mut new { errored.ctx = Some(Box::new(previous)); error!(error = ?errored, "state for project errored"); + + if errored.kind == ProjectErrorKind::NoNetwork { + // Restart the container to try and connect to the network again + new = Ok(errored.ctx.clone().unwrap().stop().unwrap()); + } } let new_state = new.as_ref().unwrap().state(); @@ -463,8 +470,6 @@ impl ProjectCreating { image: default_image, prefix, provisioner_host, - network_name, - network_id, fqdn: public, .. } = ctx.container_settings(); @@ -521,14 +526,6 @@ impl ProjectCreating { let mut config = Config::::from(container_config); - config.networking_config = deserialize_json!({ - "EndpointsConfig": { - network_name: { - "NetworkID": network_id - } - } - }); - config.host_config = deserialize_json!({ "Mounts": [{ "Target": "/opt/shuttle", @@ -602,6 +599,12 @@ where #[instrument(skip_all)] async fn next(self, ctx: &Ctx) -> Result { let container_id = self.container.id.as_ref().unwrap(); + let ContainerSettings { + network_name, + network_id, + .. + } = ctx.container_settings(); + ctx.docker() .start_container::(container_id, None) .await @@ -614,6 +617,40 @@ where } })?; + // Make sure the container is connected to the user network only + let network_config = ConnectNetworkOptions { + container: container_id, + endpoint_config: EndpointSettings { + network_id: Some(network_id.to_string()), + ..Default::default() + }, + }; + + ctx.docker().connect_network(network_name, network_config) + .await + .or_else(|err| { + if matches!(err, DockerError::DockerResponseServerError { status_code, .. } if status_code == 403) { + info!("already connected to the shuttle network"); + Ok(()) + } else { + Err(err) + } + })?; + + ctx.docker().disconnect_network("bridge", DisconnectNetworkOptions{ + container: container_id, + force: true, + }) + .await + .or_else(|err| { + if matches!(err, DockerError::DockerResponseServerError { status_code, .. } if status_code == 500) { + info!("already disconnected from the bridge network"); + Ok(()) + } else { + Err(err) + } + })?; + let container = self.container.refresh(ctx).await?; Ok(Self::Next::new(container)) @@ -748,11 +785,11 @@ impl Service { let network = safe_unwrap!(container.network_settings.networks) .values() .next() - .ok_or_else(|| ProjectError::internal("project was not linked to a network"))?; + .ok_or_else(|| ProjectError::no_network("project was not linked to a network"))?; let target = safe_unwrap!(network.ip_address) .parse() - .map_err(|_| ProjectError::internal("project did not join the network"))?; + .map_err(|_| ProjectError::no_network("project did not join the network"))?; Ok(Self { name: resource_name, @@ -916,6 +953,7 @@ where #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub enum ProjectErrorKind { Internal, + NoNetwork, } /// A runtime error coming from inside a project @@ -934,6 +972,14 @@ impl ProjectError { ctx: None, } } + + pub fn no_network>(message: S) -> Self { + Self { + kind: ProjectErrorKind::NoNetwork, + message: message.as_ref().to_string(), + ctx: None, + } + } } impl std::fmt::Display for ProjectError { From f2c496eaa76e23760063643f573c554a9988aead Mon Sep 17 00:00:00 2001 From: chesedo Date: Thu, 15 Dec 2022 11:42:48 +0200 Subject: [PATCH 2/6] feat: revive containers not connected to the user network --- gateway/src/project.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gateway/src/project.rs b/gateway/src/project.rs index b312dc97b..3250bb596 100644 --- a/gateway/src/project.rs +++ b/gateway/src/project.rs @@ -1094,6 +1094,21 @@ pub mod exec { })) .send(&sender) .await; + } else if safe_unwrap!(container.network_settings.networks).is_empty() { + debug!( + "{} is not connected to a network so will be restarted", + project_name.clone() + ); + _ = gateway + .new_task() + .project(project_name) + .and_then(task::run(|ctx| async move { + TaskResult::Done(Project::Stopping(ProjectStopping { + container: ctx.state.container().unwrap(), + })) + })) + .send(&sender) + .await; } } } From c726b3e7872dc31fc4d7735992cb1677718de323 Mon Sep 17 00:00:00 2001 From: chesedo Date: Wed, 28 Dec 2022 11:15:42 +0200 Subject: [PATCH 3/6] refactor: attach network first --- gateway/src/project.rs | 51 ++++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/gateway/src/project.rs b/gateway/src/project.rs index 3250bb596..b1f535f9d 100644 --- a/gateway/src/project.rs +++ b/gateway/src/project.rs @@ -598,26 +598,36 @@ where #[instrument(skip_all)] async fn next(self, ctx: &Ctx) -> Result { - let container_id = self.container.id.as_ref().unwrap(); + let Self { container } = self; + + let container_id = container.id.as_ref().unwrap(); let ContainerSettings { network_name, network_id, .. } = ctx.container_settings(); - ctx.docker() - .start_container::(container_id, None) + // Disconnect the bridge network before trying to start up + // For docker bug https://github.com/docker/cli/issues/1891 + // + // Also disconnecting from all network because docker just losses track of their IDs sometimes when restarting + for network in safe_unwrap!(container.network_settings.networks).keys() { + ctx.docker().disconnect_network(network, DisconnectNetworkOptions{ + container: container_id, + force: true, + }) .await .or_else(|err| { - if matches!(err, DockerError::DockerResponseServerError { status_code, .. } if status_code == 304) { - // Already started + if matches!(err, DockerError::DockerResponseServerError { status_code, .. } if status_code == 500) { + info!("already disconnected from the {network} network"); Ok(()) } else { Err(err) } })?; + } - // Make sure the container is connected to the user network only + // Make sure the container is connected to the user network let network_config = ConnectNetworkOptions { container: container_id, endpoint_config: EndpointSettings { @@ -625,33 +635,40 @@ where ..Default::default() }, }; - - ctx.docker().connect_network(network_name, network_config) + ctx.docker() + .connect_network(network_name, network_config) .await .or_else(|err| { - if matches!(err, DockerError::DockerResponseServerError { status_code, .. } if status_code == 403) { + if matches!( + err, + DockerError::DockerResponseServerError { status_code, .. } if status_code == 409 + ) { info!("already connected to the shuttle network"); Ok(()) } else { - Err(err) + error!( + error = &err as &dyn std::error::Error, + "failed to connect to shuttle network" + ); + Err(ProjectError::no_network( + "failed to connect to shuttle network", + )) } })?; - ctx.docker().disconnect_network("bridge", DisconnectNetworkOptions{ - container: container_id, - force: true, - }) + ctx.docker() + .start_container::(container_id, None) .await .or_else(|err| { - if matches!(err, DockerError::DockerResponseServerError { status_code, .. } if status_code == 500) { - info!("already disconnected from the bridge network"); + if matches!(err, DockerError::DockerResponseServerError { status_code, .. } if status_code == 304) { + // Already started Ok(()) } else { Err(err) } })?; - let container = self.container.refresh(ctx).await?; + let container = container.refresh(ctx).await?; Ok(Self::Next::new(container)) } From d2eacfd71ff21ff479fa22dababce5e0ba65085d Mon Sep 17 00:00:00 2001 From: chesedo Date: Wed, 28 Dec 2022 11:15:55 +0200 Subject: [PATCH 4/6] refactor: revive created containers --- gateway/src/project.rs | 68 ++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/gateway/src/project.rs b/gateway/src/project.rs index b1f535f9d..95545131d 100644 --- a/gateway/src/project.rs +++ b/gateway/src/project.rs @@ -1095,37 +1095,47 @@ pub mod exec { .inspect_container(safe_unwrap!(container.id), None) .await { - if let Some(ContainerState { - status: Some(ContainerStateStatusEnum::EXITED), - .. - }) = container.state - { - debug!("{} will be revived", project_name.clone()); - _ = gateway - .new_task() - .project(project_name) - .and_then(task::run(|ctx| async move { - TaskResult::Done(Project::Stopped(ProjectStopped { - container: ctx.state.container().unwrap(), + match container.state { + Some(ContainerState { + status: Some(ContainerStateStatusEnum::EXITED), + .. + }) => { + debug!("{} will be revived", project_name.clone()); + _ = gateway + .new_task() + .project(project_name) + .and_then(task::run(|ctx| async move { + TaskResult::Done(Project::Stopped(ProjectStopped { + container: ctx.state.container().unwrap(), + })) })) - })) - .send(&sender) - .await; - } else if safe_unwrap!(container.network_settings.networks).is_empty() { - debug!( - "{} is not connected to a network so will be restarted", - project_name.clone() - ); - _ = gateway - .new_task() - .project(project_name) - .and_then(task::run(|ctx| async move { - TaskResult::Done(Project::Stopping(ProjectStopping { - container: ctx.state.container().unwrap(), + .send(&sender) + .await; + } + Some(ContainerState { + status: Some(ContainerStateStatusEnum::RUNNING), + .. + }) + | Some(ContainerState { + status: Some(ContainerStateStatusEnum::CREATED), + .. + }) => { + debug!( + "{} is errored but ready according to docker. So restarting it", + project_name.clone() + ); + _ = gateway + .new_task() + .project(project_name) + .and_then(task::run(|ctx| async move { + TaskResult::Done(Project::Stopping(ProjectStopping { + container: ctx.state.container().unwrap(), + })) })) - })) - .send(&sender) - .await; + .send(&sender) + .await; + } + _ => {} } } } From 84d83f7dede14f6a2e1d7226073d1b98fae9102d Mon Sep 17 00:00:00 2001 From: chesedo Date: Wed, 28 Dec 2022 13:43:35 +0200 Subject: [PATCH 5/6] feat: attaching state --- common/src/models/project.rs | 9 ++-- gateway/src/project.rs | 82 ++++++++++++++++++++++++++++++------ 2 files changed, 74 insertions(+), 17 deletions(-) diff --git a/common/src/models/project.rs b/common/src/models/project.rs index 171c4e46a..a5dc9d1ef 100644 --- a/common/src/models/project.rs +++ b/common/src/models/project.rs @@ -15,6 +15,7 @@ pub struct Response { #[strum(serialize_all = "lowercase")] pub enum State { Creating, + Attaching, Starting, Started, Ready, @@ -39,10 +40,10 @@ impl Display for Response { impl State { pub fn get_color(&self) -> Color { match self { - State::Creating | State::Starting | State::Started => Color::Cyan, - State::Ready => Color::Green, - State::Stopped | State::Stopping | State::Destroying | State::Destroyed => Color::Blue, - State::Errored => Color::Red, + Self::Creating | Self::Attaching | Self::Starting | Self::Started => Color::Cyan, + Self::Ready => Color::Green, + Self::Stopped | Self::Stopping | Self::Destroying | Self::Destroyed => Color::Blue, + Self::Errored => Color::Red, } } } diff --git a/gateway/src/project.rs b/gateway/src/project.rs index 95545131d..1364f5ed8 100644 --- a/gateway/src/project.rs +++ b/gateway/src/project.rs @@ -148,6 +148,7 @@ impl From for Error { #[serde(rename_all = "lowercase")] pub enum Project { Creating(ProjectCreating), + Attaching(ProjectAttaching), Starting(ProjectStarting), Started(ProjectStarted), Ready(ProjectReady), @@ -160,6 +161,7 @@ pub enum Project { impl_from_variant!(Project: ProjectCreating => Creating, + ProjectAttaching => Attaching, ProjectStarting => Starting, ProjectStarted => Started, ProjectReady => Ready, @@ -222,6 +224,7 @@ impl Project { Self::Starting(_) => "starting", Self::Stopping(_) => "stopping", Self::Creating(_) => "creating", + Self::Attaching(_) => "attaching", Self::Destroying(_) => "destroying", Self::Destroyed(_) => "destroyed", Self::Errored(_) => "error", @@ -232,6 +235,7 @@ impl Project { match self { Self::Starting(ProjectStarting { container, .. }) | Self::Started(ProjectStarted { container, .. }) + | Self::Attaching(ProjectAttaching { container, .. }) | Self::Ready(ProjectReady { container, .. }) | Self::Stopping(ProjectStopping { container }) | Self::Stopped(ProjectStopped { container }) @@ -258,6 +262,7 @@ impl From for shuttle_common::models::project::State { fn from(project: Project) -> Self { match project { Project::Creating(_) => Self::Creating, + Project::Attaching(_) => Self::Attaching, Project::Starting(_) => Self::Starting, Project::Started(_) => Self::Started, Project::Ready(_) => Self::Ready, @@ -285,6 +290,17 @@ where let mut new = match self { Self::Creating(creating) => creating.next(ctx).await.into_try_state(), + Self::Attaching(attaching) => match attaching.next(ctx).await { + Err(ProjectError { + kind: ProjectErrorKind::NoNetwork, + ctx, + .. + }) => { + // Restart the container to try and connect to the network again + Ok(ctx.unwrap().stop().unwrap()) + } + attaching => attaching.into_try_state(), + }, Self::Starting(ready) => ready.next(ctx).await.into_try_state(), Self::Started(started) => match started.next(ctx).await { Ok(ProjectReadying::Ready(ready)) => Ok(ready.into()), @@ -302,11 +318,6 @@ where if let Ok(Self::Errored(errored)) = &mut new { errored.ctx = Some(Box::new(previous)); error!(error = ?errored, "state for project errored"); - - if errored.kind == ProjectErrorKind::NoNetwork { - // Restart the container to try and connect to the network again - new = Ok(errored.ctx.clone().unwrap().stop().unwrap()); - } } let new_state = new.as_ref().unwrap().state(); @@ -357,6 +368,7 @@ where async fn refresh(self, ctx: &Ctx) -> Result { let refreshed = match self { Self::Creating(creating) => Self::Creating(creating), + Self::Attaching(attaching) => Self::Attaching(attaching), Self::Starting(ProjectStarting { container }) | Self::Started(ProjectStarted { container, .. }) | Self::Ready(ProjectReady { container, .. }) @@ -556,7 +568,7 @@ impl State for ProjectCreating where Ctx: DockerContext, { - type Next = ProjectStarting; + type Next = ProjectAttaching; type Error = ProjectError; #[instrument(skip_all)] @@ -579,21 +591,21 @@ where } }) .await?; - Ok(ProjectStarting { container }) + Ok(ProjectAttaching { container }) } } #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] -pub struct ProjectStarting { +pub struct ProjectAttaching { container: ContainerInspectResponse, } #[async_trait] -impl State for ProjectStarting +impl State for ProjectAttaching where Ctx: DockerContext, { - type Next = ProjectStarted; + type Next = ProjectStarting; type Error = ProjectError; #[instrument(skip_all)] @@ -656,6 +668,31 @@ where } })?; + let container = container.refresh(ctx).await?; + + Ok(ProjectStarting { container }) + } +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct ProjectStarting { + container: ContainerInspectResponse, +} + +#[async_trait] +impl State for ProjectStarting +where + Ctx: DockerContext, +{ + type Next = ProjectStarted; + type Error = ProjectError; + + #[instrument(skip_all)] + async fn next(self, ctx: &Ctx) -> Result { + let Self { container } = self; + + let container_id = container.id.as_ref().unwrap(); + ctx.docker() .start_container::(container_id, None) .await @@ -806,7 +843,7 @@ impl Service { let target = safe_unwrap!(network.ip_address) .parse() - .map_err(|_| ProjectError::no_network("project did not join the network"))?; + .map_err(|_| ProjectError::internal("project did not join the network"))?; Ok(Self { name: resource_name, @@ -1150,6 +1187,7 @@ pub mod exec { pub mod tests { use bollard::models::ContainerState; + use bollard::service::NetworkSettings; use futures::prelude::*; use hyper::{Body, Request, StatusCode}; @@ -1172,7 +1210,21 @@ pub mod tests { image: None, from: None, }), - #[assertion = "Container created, assigned an `id`"] + #[assertion = "Container created, attach network"] + Ok(Project::Attaching(ProjectAttaching { + container: ContainerInspectResponse { + state: Some(ContainerState { + status: Some(ContainerStateStatusEnum::CREATED), + .. + }), + network_settings: Some(NetworkSettings { + networks: Some(networks), + .. + }), + .. + } + })) if networks.keys().collect::>() == vec!["bridge"], + #[assertion = "Container attached, assigned an `id`"] Ok(Project::Starting(ProjectStarting { container: ContainerInspectResponse { id: Some(container_id), @@ -1180,9 +1232,13 @@ pub mod tests { status: Some(ContainerStateStatusEnum::CREATED), .. }), + network_settings: Some(NetworkSettings { + networks: Some(networks), + .. + }), .. } - })), + })) if networks.keys().collect::>() == vec![&ctx.container_settings.network_name], #[assertion = "Container started, in a running state"] Ok(Project::Started(ProjectStarted { container: ContainerInspectResponse { From 96d3b138f722f9541cdd46bd4f0e6be8693d974d Mon Sep 17 00:00:00 2001 From: chesedo Date: Wed, 28 Dec 2022 13:49:43 +0200 Subject: [PATCH 6/6] refactor: resetting some code --- gateway/src/project.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/gateway/src/project.rs b/gateway/src/project.rs index 1364f5ed8..11bb841fe 100644 --- a/gateway/src/project.rs +++ b/gateway/src/project.rs @@ -689,9 +689,7 @@ where #[instrument(skip_all)] async fn next(self, ctx: &Ctx) -> Result { - let Self { container } = self; - - let container_id = container.id.as_ref().unwrap(); + let container_id = self.container.id.as_ref().unwrap(); ctx.docker() .start_container::(container_id, None) @@ -705,7 +703,7 @@ where } })?; - let container = container.refresh(ctx).await?; + let container = self.container.refresh(ctx).await?; Ok(Self::Next::new(container)) } @@ -839,7 +837,7 @@ impl Service { let network = safe_unwrap!(container.network_settings.networks) .values() .next() - .ok_or_else(|| ProjectError::no_network("project was not linked to a network"))?; + .ok_or_else(|| ProjectError::internal("project was not linked to a network"))?; let target = safe_unwrap!(network.ip_address) .parse()