Add health-check command to garage CLI

This command is used to check the local node health.

Related to https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/1354
This commit is contained in:
Paul FLORENCE 2026-03-05 13:46:17 +01:00 committed by Alex Auvolat
parent f7be222471
commit 9fa4e03748
4 changed files with 53 additions and 2 deletions

View File

@ -9,6 +9,39 @@ use crate::cli::remote::*;
use crate::cli::structs::*;
impl Cli {
pub async fn cmd_health_check(&self, quiet: bool) -> Result<(), Error> {
let status = self.api_request(GetClusterStatusRequest).await?;
let mut result = Err(Error::Message(
"Failed to find node id in configuration".into(),
));
if let Some(config) = &self.config {
let local_node_id = hex::encode(
garage_rpc::system::read_node_id(&config.metadata_dir)
.err_context(crate::cli::local::init::READ_KEY_ERROR)?
).to_string();
result = Err(Error::Message("Cluster is not healthy".into()));
for node in status.nodes.iter() {
if node.id == local_node_id {
result = Ok(())
}
}
if !quiet {
match result {
Ok(_) => {
println!("Healthy");
}
Err(_) => {
println!("Not healthy");
}
}
}
}
result
}
pub async fn cmd_status(&self) -> Result<(), Error> {
let status = self.api_request(GetClusterStatusRequest).await?;
let layout = self.api_request(GetClusterLayoutRequest).await?;

View File

@ -18,21 +18,23 @@ use garage_util::error::*;
use garage_rpc::*;
use crate::cli::structs::*;
use garage_api_admin::api::*;
use garage_api_admin::api_server::{AdminRpc as ProxyRpc, AdminRpcResponse as ProxyRpcResponse};
use garage_api_admin::RequestHandler;
use crate::cli::structs::*;
use garage_util::config::Config;
pub struct Cli {
pub proxy_rpc_endpoint: Arc<Endpoint<ProxyRpc, ()>>,
pub rpc_host: NodeID,
pub config: Option<Config>,
}
impl Cli {
pub async fn handle(&self, cmd: Command) -> Result<(), Error> {
match cmd {
Command::Status => self.cmd_status().await,
Command::HealthCheck(opt) => self.cmd_health_check(opt.quiet).await,
Command::Node(NodeOperation::Connect(connect_opt)) => {
self.cmd_connect(connect_opt).await
}

View File

@ -14,6 +14,10 @@ pub enum Command {
#[structopt(name = "status", version = garage_version())]
Status,
/// Check the local node health and set the exit code to 1 if it is unhealthy.
#[structopt(name = "health-check", version = garage_version())]
HealthCheck(HealthCheckOpt),
/// Operations on individual Garage nodes
#[structopt(name = "node", version = garage_version())]
Node(NodeOperation),
@ -103,6 +107,17 @@ pub struct ServerOpt {
pub(crate) default_bucket: bool,
}
// -------------------------
// ---- garage health-check ... ----
// -------------------------
#[derive(StructOpt, Debug)]
pub struct HealthCheckOpt {
/// Do not print healthyness to stdout
#[structopt(short = "q", long = "quiet")]
pub(crate) quiet: bool,
}
// -------------------------
// ---- garage node ... ----
// -------------------------

View File

@ -351,6 +351,7 @@ async fn cli_command(opt: Opt) -> Result<(), Error> {
let cli = cli::remote::Cli {
proxy_rpc_endpoint,
rpc_host: id,
config: config,
};
cli.handle(opt.cmd).await