From 0988e3b93710cff1d293dfa33aeb925ebde51be0 Mon Sep 17 00:00:00 2001 From: Seth Miller Date: Wed, 6 Apr 2016 22:47:02 -0500 Subject: [PATCH] Add support for Azure discovery This change adds the ability to do target discovery with Microsoft's Azure platform. --- config/config.go | 34 ++++- config/config_test.go | 20 +++ config/testdata/conf.good.yml | 8 ++ retrieval/discovery/azure.go | 248 ++++++++++++++++++++++++++++++++++ retrieval/targetmanager.go | 3 + 5 files changed, 312 insertions(+), 1 deletion(-) create mode 100644 retrieval/discovery/azure.go diff --git a/config/config.go b/config/config.go index 1f89d2f151..0c9537f223 100644 --- a/config/config.go +++ b/config/config.go @@ -31,7 +31,7 @@ var ( patJobName = regexp.MustCompile(`^[a-zA-Z_][a-zA-Z0-9_-]*$`) patFileSDName = regexp.MustCompile(`^[^*]*(\*[^/]*)?\.(json|yml|yaml|JSON|YML|YAML)$`) patRulePath = regexp.MustCompile(`^[^*]*(\*[^/]*)?$`) - patAuthLine = regexp.MustCompile(`((?:password|bearer_token|secret_key):\s+)(".+"|'.+'|[^\s]+)`) + patAuthLine = regexp.MustCompile(`((?:password|bearer_token|secret_key|client_secret):\s+)(".+"|'.+'|[^\s]+)`) ) // Load parses the YAML input s into a Config. @@ -139,6 +139,12 @@ var ( Port: 80, RefreshInterval: model.Duration(60 * time.Second), } + + // DefaultAzureSDConfig is the default Azure SD configuration. + DefaultAzureSDConfig = AzureSDConfig{ + Port: 80, + RefreshInterval: model.Duration(5 * time.Minute), + } ) // URL is a custom URL type that allows validation at configuration load time. @@ -409,6 +415,8 @@ type ScrapeConfig struct { KubernetesSDConfigs []*KubernetesSDConfig `yaml:"kubernetes_sd_configs,omitempty"` // List of EC2 service discovery configurations. EC2SDConfigs []*EC2SDConfig `yaml:"ec2_sd_configs,omitempty"` + // List of Azure service discovery configurations. + AzureSDConfigs []*AzureSDConfig `yaml:"azure_sd_configs,omitempty"` // List of target relabel configurations. RelabelConfigs []*RelabelConfig `yaml:"relabel_configs,omitempty"` @@ -801,6 +809,30 @@ func (c *EC2SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { return checkOverflow(c.XXX, "ec2_sd_config") } +// AzureSDConfig is the configuration for Azure based service discovery. +type AzureSDConfig struct { + Port int `yaml:"port"` + SubscriptionID string `yaml:"subscription_id"` + TenantID string `yaml:"tenant_id,omitempty"` + ClientID string `yaml:"client_id,omitempty"` + ClientSecret string `yaml:"client_secret,omitempty"` + RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` + // Catches all undefined fields and must be empty after parsing. + XXX map[string]interface{} `yaml:",inline"` +} + +// UnmarshalYAML implements the yaml.Unmarshaler interface. +func (c *AzureSDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { + *c = DefaultAzureSDConfig + type plain AzureSDConfig + err := unmarshal((*plain)(c)) + if err != nil { + return err + } + + return checkOverflow(c.XXX, "azure_sd_config") +} + // RelabelAction is the action to be performed on relabeling. type RelabelAction string diff --git a/config/config_test.go b/config/config_test.go index eebea6e72f..476f143b1b 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -270,6 +270,26 @@ var expectedConf = &Config{ }, }, }, + { + JobName: "service-azure", + + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + + MetricsPath: DefaultScrapeConfig.MetricsPath, + Scheme: DefaultScrapeConfig.Scheme, + + AzureSDConfigs: []*AzureSDConfig{ + { + SubscriptionID: "11AAAA11-A11A-111A-A111-1111A1111A11", + TenantID: "BBBB222B-B2B2-2B22-B222-2BB2222BB2B2", + ClientID: "333333CC-3C33-3333-CCC3-33C3CCCCC33C", + ClientSecret: "nAdvAK2oBuVym4IXix", + RefreshInterval: model.Duration(5 * time.Minute), + Port: 9100, + }, + }, + }, { JobName: "service-nerve", diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml index c1d11d6979..f2cc1bd0e2 100644 --- a/config/testdata/conf.good.yml +++ b/config/testdata/conf.good.yml @@ -127,6 +127,14 @@ scrape_configs: access_key: access secret_key: secret +- job_name: service-azure + azure_sd_configs: + - subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11 + tenant_id: BBBB222B-B2B2-2B22-B222-2BB2222BB2B2 + client_id: 333333CC-3C33-3333-CCC3-33C3CCCCC33C + client_secret: nAdvAK2oBuVym4IXix + port: 9100 + - job_name: service-nerve nerve_sd_configs: - servers: diff --git a/retrieval/discovery/azure.go b/retrieval/discovery/azure.go new file mode 100644 index 0000000000..78131b9d97 --- /dev/null +++ b/retrieval/discovery/azure.go @@ -0,0 +1,248 @@ +// Copyright 2015 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package discovery + +import ( + "fmt" + "strings" + "time" + + "github.com/Azure/azure-sdk-for-go/arm/compute" + "github.com/Azure/azure-sdk-for-go/arm/network" + "github.com/Azure/go-autorest/autorest/azure" + + "github.com/prometheus/common/log" + "github.com/prometheus/common/model" + "golang.org/x/net/context" + + "github.com/prometheus/prometheus/config" + "github.com/prometheus/prometheus/util/strutil" +) + +const ( + azureLabel = model.MetaLabelPrefix + "azure_" + azureLabelMachineID = azureLabel + "machine_id" + azureLabelMachineResourceGroup = azureLabel + "machine_resource_group" + azureLabelMachineName = azureLabel + "machine_name" + azureLabelMachineLocation = azureLabel + "machine_location" + azureLabelMachinePrivateIP = azureLabel + "machine_private_ip" + azureLabelMachineTag = azureLabel + "machine_tag_" +) + +// AzureDiscovery periodically performs Azure-SD requests. It implements +// the TargetProvider interface. +type AzureDiscovery struct { + cfg *config.AzureSDConfig + interval time.Duration + port int +} + +// NewAzureDiscovery returns a new AzureDiscovery which periodically refreshes its targets. +func NewAzureDiscovery(cfg *config.AzureSDConfig) *AzureDiscovery { + return &AzureDiscovery{ + cfg: cfg, + interval: time.Duration(cfg.RefreshInterval), + port: cfg.Port, + } +} + +// Run implements the TargetProvider interface. +func (ad *AzureDiscovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup) { + defer close(ch) + ticker := time.NewTicker(ad.interval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + default: + } + + tg, err := ad.refresh() + if err != nil { + log.Errorf("unable to refresh during Azure discovery: %s", err) + } else { + ch <- []*config.TargetGroup{tg} + } + + select { + case <-ticker.C: + case <-ctx.Done(): + return + } + } +} + +// azureClient represents multiple Azure Resource Manager providers. +type azureClient struct { + nic network.InterfacesClient + vm compute.VirtualMachinesClient +} + +// createAzureClient is a helper function for creating an Azure compute client to ARM. +func createAzureClient(cfg config.AzureSDConfig) (azureClient, error) { + var c azureClient + oauthConfig, err := azure.PublicCloud.OAuthConfigForTenant(cfg.TenantID) + if err != nil { + return azureClient{}, err + } + spt, err := azure.NewServicePrincipalToken(*oauthConfig, cfg.ClientID, cfg.ClientSecret, azure.PublicCloud.ResourceManagerEndpoint) + if err != nil { + return azureClient{}, err + } + + c.vm = compute.NewVirtualMachinesClient(cfg.SubscriptionID) + c.vm.Authorizer = spt + + c.nic = network.NewInterfacesClient(cfg.SubscriptionID) + c.nic.Authorizer = spt + + return c, nil +} + +// azureResource represents a resource identifier in Azure. +type azureResource struct { + Name string + ResourceGroup string +} + +// Create a new azureResource object from an ID string. +func newAzureResourceFromID(id string) (azureResource, error) { + // Resource IDs have the following format. + // /subscriptions/SUBSCRIPTION_ID/resourceGroups/RESOURCE_GROUP/providers/PROVIDER/TYPE/NAME + s := strings.Split(id, "/") + if len(s) != 9 { + err := fmt.Errorf("invalid ID '%s'. Refusing to create azureResource", id) + log.Error(err) + return azureResource{}, err + } + return azureResource{ + Name: strings.ToLower(s[8]), + ResourceGroup: strings.ToLower(s[4]), + }, nil +} + +func (ad *AzureDiscovery) refresh() (*config.TargetGroup, error) { + tg := &config.TargetGroup{} + client, err := createAzureClient(*ad.cfg) + if err != nil { + return tg, fmt.Errorf("could not create Azure client: %s", err) + } + + var machines []compute.VirtualMachine + result, err := client.vm.ListAll() + if err != nil { + return tg, fmt.Errorf("could not list virtual machines: %s", err) + } + machines = append(machines, *result.Value...) + + // If we still have results, keep going until we have no more. + for result.NextLink != nil { + result, err = client.vm.ListAllNextResults(result) + if err != nil { + return tg, fmt.Errorf("could not list virtual machines: %s", err) + } + machines = append(machines, *result.Value...) + } + log.Debugf("Found %d virtual machines during Azure discovery.", len(machines)) + + // We have the slice of machines. Now turn them into targets. + // Doing them in go routines because the network interface calls are slow. + type target struct { + labelSet model.LabelSet + err error + } + + ch := make(chan target, len(machines)) + for i, vm := range machines { + go func(i int, vm compute.VirtualMachine) { + r, err := newAzureResourceFromID(*vm.ID) + if err != nil { + ch <- target{labelSet: nil, err: err} + return + } + + labels := model.LabelSet{ + azureLabelMachineID: model.LabelValue(*vm.ID), + azureLabelMachineName: model.LabelValue(*vm.Name), + azureLabelMachineLocation: model.LabelValue(*vm.Location), + azureLabelMachineResourceGroup: model.LabelValue(r.ResourceGroup), + } + + if vm.Tags != nil { + for k, v := range *vm.Tags { + name := strutil.SanitizeLabelName(k) + labels[azureLabelMachineTag+model.LabelName(name)] = model.LabelValue(*v) + } + } + + // Get the IP address information via seperate call to the network provider. + for _, nic := range *vm.Properties.NetworkProfile.NetworkInterfaces { + r, err := newAzureResourceFromID(*nic.ID) + if err != nil { + ch <- target{labelSet: nil, err: err} + return + } + networkInterface, err := client.nic.Get(r.ResourceGroup, r.Name, "") + if err != nil { + log.Errorf("Unable to get network interface %s: %s", r.Name, err) + ch <- target{labelSet: nil, err: err} + // Get out of this routine because we cannot continue without a network interface. + return + } + + // Unfortunately Azure does not return information on whether a VM is deallocated. + // This information is available via another API call however the Go SDK does not + // yet support this. On deallocated machines, this value happens to be nil so it + // is a cheap and easy way to determine if a machine is allocated or not. + if networkInterface.Properties.Primary == nil { + log.Debugf("Virtual machine %s is deallocated. Skipping during Azure SD.", *vm.Name) + ch <- target{} + return + } + + if *networkInterface.Properties.Primary { + for _, ip := range *networkInterface.Properties.IPConfigurations { + if ip.Properties.PrivateIPAddress != nil { + labels[azureLabelMachinePrivateIP] = model.LabelValue(*ip.Properties.PrivateIPAddress) + address := fmt.Sprintf("%s:%d", *ip.Properties.PrivateIPAddress, ad.port) + labels[model.AddressLabel] = model.LabelValue(address) + ch <- target{labelSet: labels, err: nil} + return + } + // If we made it here, we don't have a private IP which should be impossible. + // Return an empty target and error to ensure an all or nothing situation. + err = fmt.Errorf("unable to find a private IP for VM %s", *vm.Name) + ch <- target{labelSet: nil, err: err} + return + } + } + } + }(i, vm) + } + + for range machines { + tgt := <-ch + if tgt.err != nil { + return nil, fmt.Errorf("unable to complete Azure service discovery: %s", err) + } + if tgt.labelSet != nil { + tg.Targets = append(tg.Targets, tgt.labelSet) + } + } + + log.Debugf("Azure discovery completed.") + return tg, nil +} diff --git a/retrieval/targetmanager.go b/retrieval/targetmanager.go index 845d5950f8..77443e31ee 100644 --- a/retrieval/targetmanager.go +++ b/retrieval/targetmanager.go @@ -393,6 +393,9 @@ func providersFromConfig(cfg *config.ScrapeConfig) map[string]TargetProvider { for i, c := range cfg.EC2SDConfigs { app("ec2", i, discovery.NewEC2Discovery(c)) } + for i, c := range cfg.AzureSDConfigs { + app("azure", i, discovery.NewAzureDiscovery(c)) + } if len(cfg.TargetGroups) > 0 { app("static", 0, NewStaticProvider(cfg.TargetGroups)) }