diff --git a/discovery/azure/azure.go b/discovery/azure/azure.go index 746496a699..7c2ece2c7b 100644 --- a/discovery/azure/azure.go +++ b/discovery/azure/azure.go @@ -213,6 +213,14 @@ func NewDiscovery(cfg *SDConfig, logger log.Logger, metrics discovery.Discoverer return d, nil } +type client interface { + getVMs(ctx context.Context, resourceGroup string) ([]virtualMachine, error) + getScaleSets(ctx context.Context, resourceGroup string) ([]armcompute.VirtualMachineScaleSet, error) + getScaleSetVMs(ctx context.Context, scaleSet armcompute.VirtualMachineScaleSet) ([]virtualMachine, error) + getVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID string) (*armnetwork.Interface, error) + getVMScaleSetVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID, scaleSetName, instanceID string) (*armnetwork.Interface, error) +} + // azureClient represents multiple Azure Resource Manager providers. type azureClient struct { nic *armnetwork.InterfacesClient @@ -222,14 +230,17 @@ type azureClient struct { logger log.Logger } +var _ client = &azureClient{} + // createAzureClient is a helper function for creating an Azure compute client to ARM. -func createAzureClient(cfg SDConfig) (azureClient, error) { +func createAzureClient(cfg SDConfig, logger log.Logger) (client, error) { cloudConfiguration, err := CloudConfigurationFromName(cfg.Environment) if err != nil { - return azureClient{}, err + return &azureClient{}, err } var c azureClient + c.logger = logger telemetry := policy.TelemetryOptions{ ApplicationID: userAgent, @@ -240,12 +251,12 @@ func createAzureClient(cfg SDConfig) (azureClient, error) { Telemetry: telemetry, }) if err != nil { - return azureClient{}, err + return &azureClient{}, err } client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, "azure_sd") if err != nil { - return azureClient{}, err + return &azureClient{}, err } options := &arm.ClientOptions{ ClientOptions: policy.ClientOptions{ @@ -257,25 +268,25 @@ func createAzureClient(cfg SDConfig) (azureClient, error) { c.vm, err = armcompute.NewVirtualMachinesClient(cfg.SubscriptionID, credential, options) if err != nil { - return azureClient{}, err + return &azureClient{}, err } c.nic, err = armnetwork.NewInterfacesClient(cfg.SubscriptionID, credential, options) if err != nil { - return azureClient{}, err + return &azureClient{}, err } c.vmss, err = armcompute.NewVirtualMachineScaleSetsClient(cfg.SubscriptionID, credential, options) if err != nil { - return azureClient{}, err + return &azureClient{}, err } c.vmssvm, err = armcompute.NewVirtualMachineScaleSetVMsClient(cfg.SubscriptionID, credential, options) if err != nil { - return azureClient{}, err + return &azureClient{}, err } - return c, nil + return &c, nil } func newCredential(cfg SDConfig, policyClientOptions policy.ClientOptions) (azcore.TokenCredential, error) { @@ -341,12 +352,11 @@ func newAzureResourceFromID(id string, logger log.Logger) (*arm.ResourceID, erro func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { defer level.Debug(d.logger).Log("msg", "Azure discovery completed") - client, err := createAzureClient(*d.cfg) + client, err := createAzureClient(*d.cfg, d.logger) if err != nil { d.metrics.failuresCount.Inc() return nil, fmt.Errorf("could not create Azure client: %w", err) } - client.logger = d.logger machines, err := client.getVMs(ctx, d.cfg.ResourceGroup) if err != nil { @@ -385,96 +395,8 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { for _, vm := range machines { go func(vm virtualMachine) { defer wg.Done() - r, err := newAzureResourceFromID(vm.ID, d.logger) - if err != nil { - ch <- target{labelSet: nil, err: err} - return - } - - labels := model.LabelSet{ - azureLabelSubscriptionID: model.LabelValue(d.cfg.SubscriptionID), - azureLabelTenantID: model.LabelValue(d.cfg.TenantID), - azureLabelMachineID: model.LabelValue(vm.ID), - azureLabelMachineName: model.LabelValue(vm.Name), - azureLabelMachineComputerName: model.LabelValue(vm.ComputerName), - azureLabelMachineOSType: model.LabelValue(vm.OsType), - azureLabelMachineLocation: model.LabelValue(vm.Location), - azureLabelMachineResourceGroup: model.LabelValue(r.ResourceGroupName), - azureLabelMachineSize: model.LabelValue(vm.Size), - } - - if vm.ScaleSet != "" { - labels[azureLabelMachineScaleSet] = model.LabelValue(vm.ScaleSet) - } - - for k, v := range vm.Tags { - name := strutil.SanitizeLabelName(k) - labels[azureLabelMachineTag+model.LabelName(name)] = model.LabelValue(*v) - } - - // Get the IP address information via separate call to the network provider. - for _, nicID := range vm.NetworkInterfaces { - var networkInterface *armnetwork.Interface - if v, ok := d.getFromCache(nicID); ok { - networkInterface = v - d.metrics.cacheHitCount.Add(1) - } else { - if vm.ScaleSet == "" { - networkInterface, err = client.getVMNetworkInterfaceByID(ctx, nicID) - } else { - networkInterface, err = client.getVMScaleSetVMNetworkInterfaceByID(ctx, nicID, vm.ScaleSet, vm.InstanceID) - } - - if err != nil { - if errors.Is(err, errorNotFound) { - level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err) - } else { - ch <- target{labelSet: nil, err: err} - } - - // Get out of this routine because we cannot continue without a network interface. - return - } - - // Continue processing with the network interface - d.addToCache(nicID, networkInterface) - } - - if networkInterface.Properties == nil { - continue - } - - // Unfortunately Azure does not return information on whether a VM is deallocated. - // This information is available via another API call however the Go SDK does not - // yet support this. On deallocated machines, this value happens to be nil so it - // is a cheap and easy way to determine if a machine is allocated or not. - if networkInterface.Properties.Primary == nil { - level.Debug(d.logger).Log("msg", "Skipping deallocated virtual machine", "machine", vm.Name) - return - } - - if *networkInterface.Properties.Primary { - for _, ip := range networkInterface.Properties.IPConfigurations { - // IPAddress is a field defined in PublicIPAddressPropertiesFormat, - // therefore we need to validate that both are not nil. - if ip.Properties != nil && ip.Properties.PublicIPAddress != nil && ip.Properties.PublicIPAddress.Properties != nil && ip.Properties.PublicIPAddress.Properties.IPAddress != nil { - labels[azureLabelMachinePublicIP] = model.LabelValue(*ip.Properties.PublicIPAddress.Properties.IPAddress) - } - if ip.Properties != nil && ip.Properties.PrivateIPAddress != nil { - labels[azureLabelMachinePrivateIP] = model.LabelValue(*ip.Properties.PrivateIPAddress) - address := net.JoinHostPort(*ip.Properties.PrivateIPAddress, fmt.Sprintf("%d", d.port)) - labels[model.AddressLabel] = model.LabelValue(address) - ch <- target{labelSet: labels, err: nil} - return - } - // If we made it here, we don't have a private IP which should be impossible. - // Return an empty target and error to ensure an all or nothing situation. - err = fmt.Errorf("unable to find a private IP for VM %s", vm.Name) - ch <- target{labelSet: nil, err: err} - return - } - } - } + labelSet, err := d.vmToLabelSet(ctx, client, vm) + ch <- target{labelSet: labelSet, err: err} }(vm) } @@ -495,6 +417,95 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { return []*targetgroup.Group{&tg}, nil } +func (d *Discovery) vmToLabelSet(ctx context.Context, client client, vm virtualMachine) (model.LabelSet, error) { + r, err := newAzureResourceFromID(vm.ID, d.logger) + if err != nil { + return nil, err + } + + labels := model.LabelSet{ + azureLabelSubscriptionID: model.LabelValue(d.cfg.SubscriptionID), + azureLabelTenantID: model.LabelValue(d.cfg.TenantID), + azureLabelMachineID: model.LabelValue(vm.ID), + azureLabelMachineName: model.LabelValue(vm.Name), + azureLabelMachineComputerName: model.LabelValue(vm.ComputerName), + azureLabelMachineOSType: model.LabelValue(vm.OsType), + azureLabelMachineLocation: model.LabelValue(vm.Location), + azureLabelMachineResourceGroup: model.LabelValue(r.ResourceGroupName), + azureLabelMachineSize: model.LabelValue(vm.Size), + } + + if vm.ScaleSet != "" { + labels[azureLabelMachineScaleSet] = model.LabelValue(vm.ScaleSet) + } + + for k, v := range vm.Tags { + name := strutil.SanitizeLabelName(k) + labels[azureLabelMachineTag+model.LabelName(name)] = model.LabelValue(*v) + } + + // Get the IP address information via separate call to the network provider. + for _, nicID := range vm.NetworkInterfaces { + var networkInterface *armnetwork.Interface + if v, ok := d.getFromCache(nicID); ok { + networkInterface = v + d.metrics.cacheHitCount.Add(1) + } else { + if vm.ScaleSet == "" { + networkInterface, err = client.getVMNetworkInterfaceByID(ctx, nicID) + } else { + networkInterface, err = client.getVMScaleSetVMNetworkInterfaceByID(ctx, nicID, vm.ScaleSet, vm.InstanceID) + } + if err != nil { + if errors.Is(err, errorNotFound) { + level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err) + } else { + return nil, err + } + // Get out of this routine because we cannot continue without a network interface. + return nil, nil + } + + // Continue processing with the network interface + d.addToCache(nicID, networkInterface) + } + + if networkInterface.Properties == nil { + continue + } + + // Unfortunately Azure does not return information on whether a VM is deallocated. + // This information is available via another API call however the Go SDK does not + // yet support this. On deallocated machines, this value happens to be nil so it + // is a cheap and easy way to determine if a machine is allocated or not. + if networkInterface.Properties.Primary == nil { + level.Debug(d.logger).Log("msg", "Skipping deallocated virtual machine", "machine", vm.Name) + return nil, nil + } + + if *networkInterface.Properties.Primary { + for _, ip := range networkInterface.Properties.IPConfigurations { + // IPAddress is a field defined in PublicIPAddressPropertiesFormat, + // therefore we need to validate that both are not nil. + if ip.Properties != nil && ip.Properties.PublicIPAddress != nil && ip.Properties.PublicIPAddress.Properties != nil && ip.Properties.PublicIPAddress.Properties.IPAddress != nil { + labels[azureLabelMachinePublicIP] = model.LabelValue(*ip.Properties.PublicIPAddress.Properties.IPAddress) + } + if ip.Properties != nil && ip.Properties.PrivateIPAddress != nil { + labels[azureLabelMachinePrivateIP] = model.LabelValue(*ip.Properties.PrivateIPAddress) + address := net.JoinHostPort(*ip.Properties.PrivateIPAddress, fmt.Sprintf("%d", d.port)) + labels[model.AddressLabel] = model.LabelValue(address) + return labels, nil + } + // If we made it here, we don't have a private IP which should be impossible. + // Return an empty target and error to ensure an all or nothing situation. + return nil, fmt.Errorf("unable to find a private IP for VM %s", vm.Name) + } + } + } + // TODO: Should we say something at this point? + return nil, nil +} + func (client *azureClient) getVMs(ctx context.Context, resourceGroup string) ([]virtualMachine, error) { var vms []virtualMachine if len(resourceGroup) == 0 { diff --git a/discovery/azure/azure_test.go b/discovery/azure/azure_test.go index 1e437c75f2..32dab66c8c 100644 --- a/discovery/azure/azure_test.go +++ b/discovery/azure/azure_test.go @@ -14,16 +14,24 @@ package azure import ( + "context" + "fmt" "testing" "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4" + cache "github.com/Code-Hex/go-generics-cache" + "github.com/Code-Hex/go-generics-cache/policy/lru" + "github.com/go-kit/log" "github.com/stretchr/testify/require" "go.uber.org/goleak" ) func TestMain(m *testing.M) { - goleak.VerifyTestMain(m) + goleak.VerifyTestMain(m, + goleak.IgnoreTopFunction("github.com/Code-Hex/go-generics-cache.(*janitor).run.func1"), + ) } func TestMapFromVMWithEmptyTags(t *testing.T) { @@ -79,6 +87,91 @@ func TestMapFromVMWithEmptyTags(t *testing.T) { require.Equal(t, expectedVM, actualVM) } +func TestVMToLabelSet(t *testing.T) { + id := "/subscriptions/00000000-0000-0000-0000-000000000000/test" + name := "name" + size := "size" + vmSize := armcompute.VirtualMachineSizeTypes(size) + osType := armcompute.OperatingSystemTypesLinux + vmType := "type" + location := "westeurope" + computerName := "computer_name" + networkID := "/subscriptions/00000000-0000-0000-0000-000000000000/network1" + ipAddress := "10.20.30.40" + primary := true + networkProfile := armcompute.NetworkProfile{ + NetworkInterfaces: []*armcompute.NetworkInterfaceReference{ + { + ID: &networkID, + Properties: &armcompute.NetworkInterfaceReferenceProperties{Primary: &primary}, + }, + }, + } + properties := &armcompute.VirtualMachineProperties{ + OSProfile: &armcompute.OSProfile{ + ComputerName: &computerName, + }, + StorageProfile: &armcompute.StorageProfile{ + OSDisk: &armcompute.OSDisk{ + OSType: &osType, + }, + }, + NetworkProfile: &networkProfile, + HardwareProfile: &armcompute.HardwareProfile{ + VMSize: &vmSize, + }, + } + + testVM := armcompute.VirtualMachine{ + ID: &id, + Name: &name, + Type: &vmType, + Location: &location, + Tags: nil, + Properties: properties, + } + + expectedVM := virtualMachine{ + ID: id, + Name: name, + ComputerName: computerName, + Type: vmType, + Location: location, + OsType: "Linux", + Tags: map[string]*string{}, + NetworkInterfaces: []string{networkID}, + Size: size, + } + + actualVM := mapFromVM(testVM) + + require.Equal(t, expectedVM, actualVM) + + cfg := DefaultSDConfig + d := &Discovery{ + cfg: &cfg, + logger: log.NewNopLogger(), + cache: cache.New(cache.AsLRU[string, *armnetwork.Interface](lru.WithCapacity(5))), + } + network := armnetwork.Interface{ + Name: &networkID, + Properties: &armnetwork.InterfacePropertiesFormat{ + Primary: &primary, + IPConfigurations: []*armnetwork.InterfaceIPConfiguration{ + {Properties: &armnetwork.InterfaceIPConfigurationPropertiesFormat{ + PrivateIPAddress: &ipAddress, + }}, + }, + }, + } + client := &mockAzureClient{ + networkInterface: &network, + } + labelSet, err := d.vmToLabelSet(context.Background(), client, actualVM) + require.NoError(t, err) + require.Len(t, labelSet, 11) +} + func TestMapFromVMWithEmptyOSType(t *testing.T) { id := "test" name := "name" @@ -381,3 +474,35 @@ func TestNewAzureResourceFromID(t *testing.T) { require.Equal(t, tc.expected.ResourceGroupName, actual.ResourceGroupName) } } + +type mockAzureClient struct { + networkInterface *armnetwork.Interface +} + +var _ client = &mockAzureClient{} + +func (*mockAzureClient) getVMs(ctx context.Context, resourceGroup string) ([]virtualMachine, error) { + return nil, nil +} + +func (*mockAzureClient) getScaleSets(ctx context.Context, resourceGroup string) ([]armcompute.VirtualMachineScaleSet, error) { + return nil, nil +} + +func (*mockAzureClient) getScaleSetVMs(ctx context.Context, scaleSet armcompute.VirtualMachineScaleSet) ([]virtualMachine, error) { + return nil, nil +} + +func (m *mockAzureClient) getVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID string) (*armnetwork.Interface, error) { + if networkInterfaceID == "" { + return nil, fmt.Errorf("parameter networkInterfaceID cannot be empty") + } + return m.networkInterface, nil +} + +func (m *mockAzureClient) getVMScaleSetVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID, scaleSetName, instanceID string) (*armnetwork.Interface, error) { + if scaleSetName == "" { + return nil, fmt.Errorf("parameter virtualMachineScaleSetName cannot be empty") + } + return m.networkInterface, nil +}