diff --git a/cmd/metrics-v3-cluster-iam.go b/cmd/metrics-v3-cluster-iam.go new file mode 100644 index 000000000..6689fe558 --- /dev/null +++ b/cmd/metrics-v3-cluster-iam.go @@ -0,0 +1,69 @@ +// Copyright (c) 2024 MinIO, Inc. +// +// This file is part of MinIO Object Storage stack +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package cmd + +import ( + "context" + "sync/atomic" + "time" +) + +const ( + lastSyncDurationMillis = "last_sync_duration_millis" + pluginAuthnServiceFailedRequestsMinute = "plugin_authn_service_failed_requests_minute" + pluginAuthnServiceLastFailSeconds = "plugin_authn_service_last_fail_seconds" + pluginAuthnServiceLastSuccSeconds = "plugin_authn_service_last_succ_seconds" + pluginAuthnServiceSuccAvgRttMsMinute = "plugin_authn_service_succ_avg_rtt_ms_minute" + pluginAuthnServiceSuccMaxRttMsMinute = "plugin_authn_service_succ_max_rtt_ms_minute" + pluginAuthnServiceTotalRequestsMinute = "plugin_authn_service_total_requests_minute" + sinceLastSyncMillis = "since_last_sync_millis" + syncFailures = "sync_failures" + syncSuccesses = "sync_successes" +) + +var ( + lastSyncDurationMillisMD = NewCounterMD(lastSyncDurationMillis, "Last successful IAM data sync duration in milliseconds") + pluginAuthnServiceFailedRequestsMinuteMD = NewCounterMD(pluginAuthnServiceFailedRequestsMinute, "When plugin authentication is configured, returns failed requests count in the last full minute") + pluginAuthnServiceLastFailSecondsMD = NewCounterMD(pluginAuthnServiceLastFailSeconds, "When plugin authentication is configured, returns time (in seconds) since the last failed request to the service") + pluginAuthnServiceLastSuccSecondsMD = NewCounterMD(pluginAuthnServiceLastSuccSeconds, "When plugin authentication is configured, returns time (in seconds) since the last successful request to the service") + pluginAuthnServiceSuccAvgRttMsMinuteMD = NewCounterMD(pluginAuthnServiceSuccAvgRttMsMinute, "When plugin authentication is configured, returns average round-trip-time of successful requests in the last full minute") + pluginAuthnServiceSuccMaxRttMsMinuteMD = NewCounterMD(pluginAuthnServiceSuccMaxRttMsMinute, "When plugin authentication is configured, returns maximum round-trip-time of successful requests in the last full minute") + pluginAuthnServiceTotalRequestsMinuteMD = NewCounterMD(pluginAuthnServiceTotalRequestsMinute, "When plugin authentication is configured, returns total requests count in the last full minute") + sinceLastSyncMillisMD = NewCounterMD(sinceLastSyncMillis, "Time (in milliseconds) since last successful IAM data sync.") + syncFailuresMD = NewCounterMD(syncFailures, "Number of failed IAM data syncs since server start.") + syncSuccessesMD = NewCounterMD(syncSuccesses, "Number of successful IAM data syncs since server start.") +) + +// loadClusterIAMMetrics - `MetricsLoaderFn` for cluster IAM metrics. +func loadClusterIAMMetrics(_ context.Context, m MetricValues, _ *metricsCache) error { + m.Set(lastSyncDurationMillis, float64(atomic.LoadUint64(&globalIAMSys.LastRefreshDurationMilliseconds))) + pluginAuthNMetrics := globalAuthNPlugin.Metrics() + m.Set(pluginAuthnServiceFailedRequestsMinute, float64(pluginAuthNMetrics.FailedRequests)) + m.Set(pluginAuthnServiceLastFailSeconds, pluginAuthNMetrics.LastUnreachableSecs) + m.Set(pluginAuthnServiceLastSuccSeconds, pluginAuthNMetrics.LastReachableSecs) + m.Set(pluginAuthnServiceSuccAvgRttMsMinute, pluginAuthNMetrics.AvgSuccRTTMs) + m.Set(pluginAuthnServiceSuccMaxRttMsMinute, pluginAuthNMetrics.MaxSuccRTTMs) + m.Set(pluginAuthnServiceTotalRequestsMinute, float64(pluginAuthNMetrics.TotalRequests)) + lastSyncTime := atomic.LoadUint64(&globalIAMSys.LastRefreshTimeUnixNano) + if lastSyncTime != 0 { + m.Set(sinceLastSyncMillis, float64((uint64(time.Now().UnixNano())-lastSyncTime)/uint64(time.Millisecond))) + } + m.Set(syncFailures, float64(atomic.LoadUint64(&globalIAMSys.TotalRefreshFailures))) + m.Set(syncSuccesses, float64(atomic.LoadUint64(&globalIAMSys.TotalRefreshSuccesses))) + return nil +} diff --git a/cmd/metrics-v3.go b/cmd/metrics-v3.go index 38c30dd21..3711c0f73 100644 --- a/cmd/metrics-v3.go +++ b/cmd/metrics-v3.go @@ -46,6 +46,7 @@ const ( clusterErasureSetCollectorPath collectorPath = "/cluster/erasure-set" clusterAuditCollectorPath collectorPath = "/cluster/audit" clusterNotificationCollectorPath collectorPath = "/cluster/notification" + clusterIAMCollectorPath collectorPath = "/cluster/iam" ) const ( @@ -278,6 +279,22 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection { loadClusterNotificationMetrics, ) + clusterIAMMG := NewMetricsGroup(clusterIAMCollectorPath, + []MetricDescriptor{ + lastSyncDurationMillisMD, + pluginAuthnServiceFailedRequestsMinuteMD, + pluginAuthnServiceLastFailSecondsMD, + pluginAuthnServiceLastSuccSecondsMD, + pluginAuthnServiceSuccAvgRttMsMinuteMD, + pluginAuthnServiceSuccMaxRttMsMinuteMD, + pluginAuthnServiceTotalRequestsMinuteMD, + sinceLastSyncMillisMD, + syncFailuresMD, + syncSuccessesMD, + }, + loadClusterIAMMetrics, + ) + allMetricGroups := []*MetricsGroup{ apiRequestsMG, apiBucketMG, @@ -294,6 +311,7 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection { clusterErasureSetMG, clusterAuditMG, clusterNotificationMG, + clusterIAMMG, } // Bucket metrics are special, they always include the bucket label. These diff --git a/docs/metrics/v3.md b/docs/metrics/v3.md index e10bc947c..8d442d4bf 100644 --- a/docs/metrics/v3.md +++ b/docs/metrics/v3.md @@ -253,3 +253,18 @@ The standard metrics group for GoCollector is not shown below. | `minio_cluster_notification_events_errors_total` | `counter` | Events that were failed to be sent to the targets | | | `minio_cluster_notification_events_sent_total` | `counter` | Total number of events sent to the targets | | | `minio_cluster_notification_events_skipped_total` | `counter` | Events that were skipped to be sent to the targets due to the in-memory queue being full | | + +### `/cluster/iam` + +| Name | Type | Help | Labels | +|-----------------------------------------------------------------|-----------|--------------------------------------------------------------------------------------------------------------------------|--------| +| `minio_cluster_iam_last_sync_duration_millis` | `counter` | Last successful IAM data sync duration in milliseconds | | +| `minio_cluster_iam_plugin_authn_service_failed_requests_minute` | `counter` | When plugin authentication is configured, returns failed requests count in the last full minute | | +| `minio_cluster_iam_plugin_authn_service_last_fail_seconds` | `counter` | When plugin authentication is configured, returns time (in seconds) since the last failed request to the service | | +| `minio_cluster_iam_plugin_authn_service_last_succ_seconds` | `counter` | When plugin authentication is configured, returns time (in seconds) since the last successful request to the service | | +| `minio_cluster_iam_plugin_authn_service_succ_avg_rtt_ms_minute` | `counter` | When plugin authentication is configured, returns average round-trip-time of successful requests in the last full minute | | +| `minio_cluster_iam_plugin_authn_service_succ_max_rtt_ms_minute` | `counter` | When plugin authentication is configured, returns maximum round-trip-time of successful requests in the last full minute | | +| `minio_cluster_iam_plugin_authn_service_total_requests_minute` | `counter` | When plugin authentication is configured, returns total requests count in the last full minute | | +| `minio_cluster_iam_since_last_sync_millis` | `counter` | Time (in milliseconds) since last successful IAM data sync | | +| `minio_cluster_iam_sync_failures` | `counter` | Number of failed IAM data syncs since server start | | +| `minio_cluster_iam_sync_successes` | `counter` | Number of successful IAM data syncs since server start | |