Skip to content

Commit 64bef6f

Browse files
committed
[support bundle] Add health checks to support bundles
1 parent f2aae61 commit 64bef6f

File tree

6 files changed

+70
-1
lines changed

6 files changed

+70
-1
lines changed

nexus/src/app/background/tasks/support_bundle_collector.rs

+6
Original file line numberDiff line numberDiff line change
@@ -662,6 +662,12 @@ impl BundleCollection<'_> {
662662
sled_client.support_zpool_info(),
663663
)
664664
.boxed(),
665+
save_diag_cmd_output_or_error(
666+
&sled_path,
667+
"health-check",
668+
sled_client.support_health_check(),
669+
)
670+
.boxed(),
665671
])
666672
// Currently we execute up to 10 commands concurrently which
667673
// might be doing their own concurrent work, for example

openapi/sled-agent.json

+27
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,33 @@
659659
}
660660
}
661661
},
662+
"/support/health-check": {
663+
"get": {
664+
"operationId": "support_health_check",
665+
"responses": {
666+
"200": {
667+
"description": "successful operation",
668+
"content": {
669+
"application/json": {
670+
"schema": {
671+
"title": "Array_of_SledDiagnosticsQueryOutput",
672+
"type": "array",
673+
"items": {
674+
"$ref": "#/components/schemas/SledDiagnosticsQueryOutput"
675+
}
676+
}
677+
}
678+
}
679+
},
680+
"4XX": {
681+
"$ref": "#/components/responses/Error"
682+
},
683+
"5XX": {
684+
"$ref": "#/components/responses/Error"
685+
}
686+
}
687+
}
688+
},
662689
"/support/ipadm-info": {
663690
"get": {
664691
"operationId": "support_ipadm_info",

sled-agent/api/src/lib.rs

+8
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,14 @@ pub trait SledAgentApi {
673673
request_context: RequestContext<Self::Context>,
674674
) -> Result<HttpResponseOk<SledDiagnosticsQueryOutput>, HttpError>;
675675

676+
#[endpoint {
677+
method = GET,
678+
path = "/support/health-check",
679+
}]
680+
async fn support_health_check(
681+
request_context: RequestContext<Self::Context>,
682+
) -> Result<HttpResponseOk<Vec<SledDiagnosticsQueryOutput>>, HttpError>;
683+
676684
/// This endpoint returns a list of known zones on a sled that have service
677685
/// logs that can be collected into a support bundle.
678686
#[endpoint {

sled-agent/src/http_entrypoints.rs

+14
Original file line numberDiff line numberDiff line change
@@ -1036,6 +1036,20 @@ impl SledAgentApi for SledAgentImpl {
10361036
Ok(HttpResponseOk(res.get_output()))
10371037
}
10381038

1039+
async fn support_health_check(
1040+
request_context: RequestContext<Self::Context>,
1041+
) -> Result<HttpResponseOk<Vec<SledDiagnosticsQueryOutput>>, HttpError>
1042+
{
1043+
let sa = request_context.context();
1044+
Ok(HttpResponseOk(
1045+
sa.support_health_check()
1046+
.await
1047+
.into_iter()
1048+
.map(|cmd| cmd.get_output())
1049+
.collect::<Vec<_>>(),
1050+
))
1051+
}
1052+
10391053
async fn support_logs(
10401054
request_context: RequestContext<Self::Context>,
10411055
) -> Result<HttpResponseOk<Vec<String>>, HttpError> {

sled-agent/src/sim/http_entrypoints.rs

+7
Original file line numberDiff line numberDiff line change
@@ -745,6 +745,13 @@ impl SledAgentApi for SledAgentSimImpl {
745745
method_unimplemented()
746746
}
747747

748+
async fn support_health_check(
749+
_request_context: RequestContext<Self::Context>,
750+
) -> Result<HttpResponseOk<Vec<SledDiagnosticsQueryOutput>>, HttpError>
751+
{
752+
method_unimplemented()
753+
}
754+
748755
async fn support_logs(
749756
_request_context: RequestContext<Self::Context>,
750757
) -> Result<HttpResponseOk<Vec<String>>, HttpError> {

sled-agent/src/sled_agent.rs

+8-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ use sled_agent_types::zone_bundle::{
6767
BundleUtilization, CleanupContext, CleanupCount, CleanupPeriod,
6868
PriorityOrder, StorageLimit, ZoneBundleMetadata,
6969
};
70-
use sled_diagnostics::{SledDiagnosticsCmdError, SledDiagnosticsCmdOutput};
70+
use sled_diagnostics::SledDiagnosticsCmdError;
71+
use sled_diagnostics::SledDiagnosticsCmdOutput;
7172
use sled_hardware::{HardwareManager, MemoryReservations, underlay};
7273
use sled_hardware_types::Baseboard;
7374
use sled_hardware_types::underlay::BootstrapInterface;
@@ -1474,6 +1475,12 @@ impl SledAgent {
14741475
) -> Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError> {
14751476
sled_diagnostics::zpool_info().await
14761477
}
1478+
1479+
pub(crate) async fn support_health_check(
1480+
&self,
1481+
) -> Vec<Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError>> {
1482+
sled_diagnostics::health_check().await
1483+
}
14771484
}
14781485

14791486
#[derive(From, thiserror::Error, Debug)]

0 commit comments

Comments
 (0)