Skip to main content

Mountain/RunTime/Shutdown/
ShutdownCocoonWithRetry.rs

1
2//! Send `$shutdown` over gRPC to Cocoon (3 attempts), then SIGKILL the child
3//! regardless of gRPC outcome. The hard-kill (Atom I6) is critical: a gRPC
4//! failure (transport error, broken pipe) used to leave the child orphaned,
5//! holding port 50052, and the next Mountain launch hit EADDRINUSE with the
6//! extension host stuck in degraded mode.
7
8use std::sync::Arc;
9
10use CommonLibrary::{Environment::Requires::Requires, Error::CommonError::CommonError, IPC::IPCProvider::IPCProvider};
11
12use crate::{RunTime::ApplicationRunTime::ApplicationRunTime, dev_log};
13
14impl ApplicationRunTime {
15	pub async fn ShutdownCocoonWithRetry(&self) -> Result<(), CommonError> {
16		let IPCProvider:Arc<dyn IPCProvider> = self.Environment.Require();
17
18		let MaximumAttempts = 3;
19
20		let mut Attempts = 0;
21
22		let mut GracefulOk = false;
23
24		let mut LastError:Option<CommonError> = None;
25
26		while Attempts < MaximumAttempts {
27			match IPCProvider
28				.SendNotificationToSideCar("cocoon-main".to_string(), "$shutdown".to_string(), serde_json::Value::Null)
29				.await
30			{
31				Ok(()) => {
32					tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
33
34					GracefulOk = true;
35
36					break;
37				},
38
39				Err(Error) => {
40					Attempts += 1;
41
42					LastError = Some(Error.clone());
43
44					if Attempts < MaximumAttempts {
45						dev_log!(
46							"lifecycle",
47							"warn: [ApplicationRunTime] Cocoon shutdown attempt {} failed: {}. Retrying...",
48							Attempts,
49							Error
50						);
51
52						tokio::time::sleep(tokio::time::Duration::from_millis(1000)).await;
53					}
54				},
55			}
56		}
57
58		// Mark the Vine gRPC client shutting down BEFORE the SIGKILL so any
59		// background tokio task firing `SendNotification` after this flips
60		// short-circuits to `Ok(())` instead of attempting a TCP connect to
61		// the dead socket and logging a false-positive `Connection refused`.
62		crate::Vine::Client::MarkShutdown::Fn();
63
64		// Atom I6: always reap the child after the graceful attempt. No-op if
65		// the child already exited from $shutdown.
66		crate::ProcessManagement::CocoonManagement::HardKillCocoon().await;
67
68		if GracefulOk {
69			Ok(())
70		} else {
71			Err(LastError.unwrap_or_else(|| {
72				CommonError::Unknown { Description:"Failed to shutdown Cocoon after maximum retries".to_string() }
73			}))
74		}
75	}
76}