diff --git a/config/config.go b/config/config.go index d32fcc33c9..7f7595dcdf 100644 --- a/config/config.go +++ b/config/config.go @@ -409,6 +409,9 @@ type GlobalConfig struct { // More than this label value length post metric-relabeling will cause the // scrape to fail. 0 means no limit. LabelValueLengthLimit uint `yaml:"label_value_length_limit,omitempty"` + // Keep no more than this many dropped targets per job. + // 0 means no limit. + KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"` } // SetDirectory joins any relative file paths with dir. @@ -514,6 +517,9 @@ type ScrapeConfig struct { // More than this many buckets in a native histogram will cause the scrape to // fail. NativeHistogramBucketLimit uint `yaml:"native_histogram_bucket_limit,omitempty"` + // Keep no more than this many dropped targets per job. + // 0 means no limit. + KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"` // We cannot do proper Go type embedding below as the parser will then parse // values arbitrarily into the overflow maps of further-down types. @@ -608,6 +614,9 @@ func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error { if c.LabelValueLengthLimit == 0 { c.LabelValueLengthLimit = globalConfig.LabelValueLengthLimit } + if c.KeepDroppedTargets == 0 { + c.KeepDroppedTargets = globalConfig.KeepDroppedTargets + } return nil } diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index 6691902579..f15a9f914d 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -106,6 +106,10 @@ global: # change in the future. [ target_limit: | default = 0 ] + # Limit per scrape config on the number of targets dropped by relabeling + # that will be kept in memory. 0 means no limit. + [ keep_dropped_targets: | default = 0 ] + # Rule files specifies a list of globs. Rules and alerts are read from # all matching files. rule_files: @@ -415,6 +419,10 @@ metric_relabel_configs: # change in the future. [ target_limit: | default = 0 ] +# Per-job limit on the number of targets dropped by relabeling +# that will be kept in memory. 0 means no limit. +[ keep_dropped_targets: | default = 0 ] + # Limit on total number of positive and negative buckets allowed in a single # native histogram. If this is exceeded, the entire scrape will be treated as # failed. 0 means no limit. diff --git a/docs/querying/api.md b/docs/querying/api.md index 8ddb834ef7..408d32cdab 100644 --- a/docs/querying/api.md +++ b/docs/querying/api.md @@ -543,6 +543,7 @@ GET /api/v1/targets ``` Both the active and dropped targets are part of the response by default. +Dropped targets are subject to `keep_dropped_targets` limit, if set. `labels` represents the label set after relabeling has occurred. `discoveredLabels` represent the unmodified labels retrieved during service discovery before relabeling has occurred. diff --git a/documentation/examples/prometheus-kubernetes.yml b/documentation/examples/prometheus-kubernetes.yml index 9a62287342..ad7451c2d7 100644 --- a/documentation/examples/prometheus-kubernetes.yml +++ b/documentation/examples/prometheus-kubernetes.yml @@ -8,6 +8,11 @@ # If you are using Kubernetes 1.7.2 or earlier, please take note of the comments # for the kubernetes-cadvisor job; you will need to edit or remove this job. +# Keep at most 100 sets of details of targets dropped by relabeling. +# This information is used to display in the UI for troubleshooting. +global: + keep_dropped_targets: 100 + # Scrape config for API servers. # # Kubernetes exposes API servers as endpoints to the default/kubernetes diff --git a/scrape/manager.go b/scrape/manager.go index d7cf6792c2..427b9f2be1 100644 --- a/scrape/manager.go +++ b/scrape/manager.go @@ -357,7 +357,7 @@ func (m *Manager) TargetsActive() map[string][]*Target { return targets } -// TargetsDropped returns the dropped targets during relabelling. +// TargetsDropped returns the dropped targets during relabelling, subject to KeepDroppedTargets limit. func (m *Manager) TargetsDropped() map[string][]*Target { m.mtxScrape.Lock() defer m.mtxScrape.Unlock() @@ -368,3 +368,14 @@ func (m *Manager) TargetsDropped() map[string][]*Target { } return targets } + +func (m *Manager) TargetsDroppedCounts() map[string]int { + m.mtxScrape.Lock() + defer m.mtxScrape.Unlock() + + counts := make(map[string]int, len(m.scrapePools)) + for tset, sp := range m.scrapePools { + counts[tset] = sp.droppedTargetsCount + } + return counts +} diff --git a/scrape/scrape.go b/scrape/scrape.go index df729b4489..40836afc20 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -242,8 +242,9 @@ type scrapePool struct { targetMtx sync.Mutex // activeTargets and loops must always be synchronized to have the same // set of hashes. - activeTargets map[uint64]*Target - droppedTargets []*Target + activeTargets map[uint64]*Target + droppedTargets []*Target // Subject to KeepDroppedTargets limit. + droppedTargetsCount int // Count of all dropped targets. // Constructor for new scrape loops. This is settable for testing convenience. newLoop func(scrapeLoopOptions) loop @@ -354,12 +355,19 @@ func (sp *scrapePool) ActiveTargets() []*Target { return tActive } +// Return dropped targets, subject to KeepDroppedTargets limit. func (sp *scrapePool) DroppedTargets() []*Target { sp.targetMtx.Lock() defer sp.targetMtx.Unlock() return sp.droppedTargets } +func (sp *scrapePool) DroppedTargetsCount() int { + sp.targetMtx.Lock() + defer sp.targetMtx.Unlock() + return sp.droppedTargetsCount +} + // stop terminates all scrape loops and returns after they all terminated. func (sp *scrapePool) stop() { sp.mtx.Lock() @@ -506,6 +514,7 @@ func (sp *scrapePool) Sync(tgs []*targetgroup.Group) { var targets []*Target lb := labels.NewBuilder(labels.EmptyLabels()) sp.droppedTargets = []*Target{} + sp.droppedTargetsCount = 0 for _, tg := range tgs { targets, failures := TargetsFromGroup(tg, sp.config, sp.noDefaultPort, targets, lb) for _, err := range failures { @@ -520,7 +529,10 @@ func (sp *scrapePool) Sync(tgs []*targetgroup.Group) { case nonEmpty: all = append(all, t) case !t.discoveredLabels.IsEmpty(): - sp.droppedTargets = append(sp.droppedTargets, t) + if sp.config.KeepDroppedTargets != 0 && uint(len(sp.droppedTargets)) < sp.config.KeepDroppedTargets { + sp.droppedTargets = append(sp.droppedTargets, t) + } + sp.droppedTargetsCount++ } } } diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index 3f119b94dd..8578f1bec6 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -88,6 +88,7 @@ func TestDroppedTargetsList(t *testing.T) { SourceLabels: model.LabelNames{"job"}, }, }, + KeepDroppedTargets: 1, } tgs = []*targetgroup.Group{ { diff --git a/web/api/v1/api.go b/web/api/v1/api.go index 99589ac46f..227027e462 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -100,6 +100,7 @@ type ScrapePoolsRetriever interface { type TargetRetriever interface { TargetsActive() map[string][]*scrape.Target TargetsDropped() map[string][]*scrape.Target + TargetsDroppedCounts() map[string]int } // AlertmanagerRetriever provides a list of all/dropped AlertManager URLs. @@ -898,8 +899,9 @@ type DroppedTarget struct { // TargetDiscovery has all the active targets. type TargetDiscovery struct { - ActiveTargets []*Target `json:"activeTargets"` - DroppedTargets []*DroppedTarget `json:"droppedTargets"` + ActiveTargets []*Target `json:"activeTargets"` + DroppedTargets []*DroppedTarget `json:"droppedTargets"` + DroppedTargetCounts map[string]int `json:"droppedTargetCounts"` } // GlobalURLOptions contains fields used for deriving the global URL for local targets. @@ -1039,6 +1041,9 @@ func (api *API) targets(r *http.Request) apiFuncResult { } else { res.ActiveTargets = []*Target{} } + if showDropped { + res.DroppedTargetCounts = api.targetRetriever(r.Context()).TargetsDroppedCounts() + } if showDropped { targetsDropped := api.targetRetriever(r.Context()).TargetsDropped() droppedKeys, numTargets := sortKeys(targetsDropped) diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index 99e3b292e8..742ca09ba6 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -137,6 +137,14 @@ func (t testTargetRetriever) TargetsDropped() map[string][]*scrape.Target { return t.droppedTargets } +func (t testTargetRetriever) TargetsDroppedCounts() map[string]int { + r := make(map[string]int) + for k, v := range t.droppedTargets { + r[k] = len(v) + } + return r +} + func (t *testTargetRetriever) SetMetadataStoreForTargets(identifier string, metadata scrape.MetricMetadataStore) error { targets, ok := t.activeTargets[identifier] @@ -1384,6 +1392,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, }, }, + DroppedTargetCounts: map[string]int{"blackbox": 1}, }, }, { @@ -1436,6 +1445,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, }, }, + DroppedTargetCounts: map[string]int{"blackbox": 1}, }, }, { @@ -1498,6 +1508,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, }, }, + DroppedTargetCounts: map[string]int{"blackbox": 1}, }, }, // With a matching metric. diff --git a/web/api/v1/errors_test.go b/web/api/v1/errors_test.go index afdd673375..8d194a0581 100644 --- a/web/api/v1/errors_test.go +++ b/web/api/v1/errors_test.go @@ -229,6 +229,11 @@ func (DummyTargetRetriever) TargetsDropped() map[string][]*scrape.Target { return map[string][]*scrape.Target{} } +// TargetsDroppedCounts implements targetRetriever. +func (DummyTargetRetriever) TargetsDroppedCounts() map[string]int { + return nil +} + // DummyAlertmanagerRetriever implements AlertmanagerRetriever. type DummyAlertmanagerRetriever struct{} diff --git a/web/ui/react-app/src/pages/serviceDiscovery/Services.tsx b/web/ui/react-app/src/pages/serviceDiscovery/Services.tsx index 21bf2259b9..79d88fbe4f 100644 --- a/web/ui/react-app/src/pages/serviceDiscovery/Services.tsx +++ b/web/ui/react-app/src/pages/serviceDiscovery/Services.tsx @@ -14,6 +14,7 @@ import SearchBar from '../../components/SearchBar'; interface ServiceMap { activeTargets: Target[]; droppedTargets: DroppedTarget[]; + droppedTargetCounts: Record; } export interface TargetLabels { @@ -34,7 +35,7 @@ const droppedTargetKVSearch = new KVSearch({ export const processSummary = ( activeTargets: Target[], - droppedTargets: DroppedTarget[] + droppedTargetCounts: Record ): Record => { const targets: Record = {}; @@ -50,15 +51,15 @@ export const processSummary = ( targets[name].total++; targets[name].active++; } - for (const target of droppedTargets) { - const { job: name } = target.discoveredLabels; + for (const name in targets) { if (!targets[name]) { targets[name] = { - total: 0, + total: droppedTargetCounts[name], active: 0, }; + } else { + targets[name].total += droppedTargetCounts[name]; } - targets[name].total++; } return targets; @@ -94,10 +95,10 @@ export const processTargets = (activeTargets: Target[], droppedTargets: DroppedT return labels; }; -export const ServiceDiscoveryContent: FC = ({ activeTargets, droppedTargets }) => { +export const ServiceDiscoveryContent: FC = ({ activeTargets, droppedTargets, droppedTargetCounts }) => { const [activeTargetList, setActiveTargetList] = useState(activeTargets); const [droppedTargetList, setDroppedTargetList] = useState(droppedTargets); - const [targetList, setTargetList] = useState(processSummary(activeTargets, droppedTargets)); + const [targetList, setTargetList] = useState(processSummary(activeTargets, droppedTargetCounts)); const [labelList, setLabelList] = useState(processTargets(activeTargets, droppedTargets)); const handleSearchChange = useCallback( @@ -118,9 +119,9 @@ export const ServiceDiscoveryContent: FC = ({ activeTargets, dropped const defaultValue = useMemo(getQuerySearchFilter, []); useEffect(() => { - setTargetList(processSummary(activeTargetList, droppedTargetList)); + setTargetList(processSummary(activeTargetList, droppedTargetCounts)); setLabelList(processTargets(activeTargetList, droppedTargetList)); - }, [activeTargetList, droppedTargetList]); + }, [activeTargetList, droppedTargetList, droppedTargetCounts]); return ( <>