diff --git a/cmd/k8s-operator/e2e/ingress_test.go b/cmd/k8s-operator/e2e/ingress_test.go index 5339b0583..47a838414 100644 --- a/cmd/k8s-operator/e2e/ingress_test.go +++ b/cmd/k8s-operator/e2e/ingress_test.go @@ -5,6 +5,7 @@ package e2e import ( "context" + "encoding/json" "fmt" "net/http" "testing" @@ -14,7 +15,11 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/yaml" + "tailscale.com/cmd/testwrapper/flakytest" kube "tailscale.com/k8s-operator" "tailscale.com/tstest" "tailscale.com/types/ptr" @@ -23,6 +28,7 @@ import ( // See [TestMain] for test requirements. func TestIngress(t *testing.T) { + flakytest.Mark(t, "https://github.com/tailscale/corp/issues/37533") if tnClient == nil { t.Skip("TestIngress requires a working tailnet client") } @@ -85,8 +91,68 @@ func TestIngress(t *testing.T) { } createAndCleanup(t, kubeClient, svc) + // TODO(tomhjp): Delete once we've reproduced the flake with this extra info. + t0 := time.Now() + watcherCtx, cancelWatcher := context.WithCancel(t.Context()) + defer cancelWatcher() + go func() { + // client-go client for logs. + clientGoKubeClient, err := kubernetes.NewForConfig(restCfg) + if err != nil { + t.Logf("error creating client-go Kubernetes client: %v", err) + return + } + + for { + select { + case <-watcherCtx.Done(): + t.Logf("stopping watcher after %v", time.Since(t0)) + return + case <-time.After(time.Minute): + t.Logf("dumping info after %v elapsed", time.Since(t0)) + // Service itself. + svc := &corev1.Service{ObjectMeta: objectMeta("default", "test-ingress")} + err := get(watcherCtx, kubeClient, svc) + svcYaml, _ := yaml.Marshal(svc) + t.Logf("Service: %s, error: %v\n%s", svc.Name, err, string(svcYaml)) + + // Pods in tailscale namespace. + var pods corev1.PodList + if err := kubeClient.List(watcherCtx, &pods, client.InNamespace("tailscale")); err != nil { + t.Logf("error listing Pods in tailscale namespace: %v", err) + } else { + t.Logf("%d Pods", len(pods.Items)) + for _, pod := range pods.Items { + podYaml, _ := yaml.Marshal(pod) + t.Logf("Pod: %s\n%s", pod.Name, string(podYaml)) + logs := clientGoKubeClient.CoreV1().Pods("tailscale").GetLogs(pod.Name, &corev1.PodLogOptions{}).Do(watcherCtx) + logData, err := logs.Raw() + if err != nil { + t.Logf("error reading logs for Pod %s: %v", pod.Name, err) + continue + } + t.Logf("Logs for Pod %s:\n%s", pod.Name, string(logData)) + } + } + + // Tailscale status on the tailnet. + lc, err := tnClient.LocalClient() + if err != nil { + t.Logf("error getting tailnet local client: %v", err) + } else { + status, err := lc.Status(watcherCtx) + statusJSON, _ := json.MarshalIndent(status, "", " ") + t.Logf("Tailnet status: %s, error: %v", string(statusJSON), err) + } + } + } + }() + // TODO: instead of timing out only when test times out, cancel context after 60s or so. if err := wait.PollUntilContextCancel(t.Context(), time.Millisecond*100, true, func(ctx context.Context) (done bool, err error) { + if time.Since(t0) > time.Minute { + t.Logf("%v elapsed waiting for Service default/test-ingress to become Ready", time.Since(t0)) + } maybeReadySvc := &corev1.Service{ObjectMeta: objectMeta("default", "test-ingress")} if err := get(ctx, kubeClient, maybeReadySvc); err != nil { return false, err @@ -99,6 +165,7 @@ func TestIngress(t *testing.T) { }); err != nil { t.Fatalf("error waiting for the Service to become Ready: %v", err) } + cancelWatcher() var resp *http.Response if err := tstest.WaitFor(time.Minute, func() error { diff --git a/cmd/k8s-operator/e2e/setup.go b/cmd/k8s-operator/e2e/setup.go index 845a59145..c4fd45d3e 100644 --- a/cmd/k8s-operator/e2e/setup.go +++ b/cmd/k8s-operator/e2e/setup.go @@ -70,6 +70,7 @@ const ( var ( tsClient *tailscale.Client // For API calls to control. tnClient *tsnet.Server // For testing real tailnet traffic. + restCfg *rest.Config // For constructing a client-go client if necessary. kubeClient client.WithWatch // For k8s API calls. //go:embed certs/pebble.minica.crt @@ -141,7 +142,7 @@ func runTests(m *testing.M) (int, error) { } // Cluster client setup. - restCfg, err := clientcmd.BuildConfigFromFlags("", kubeconfig) + restCfg, err = clientcmd.BuildConfigFromFlags("", kubeconfig) if err != nil { return 0, fmt.Errorf("error loading kubeconfig: %w", err) }