diff --git a/go.mod b/go.mod index 88812912e..9bd6b3f0b 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/Azure/azure-sdk-for-go v68.0.0+incompatible github.com/Azure/go-autorest/autorest v0.11.30 github.com/Azure/go-autorest/autorest/azure/auth v0.5.13 + github.com/DataDog/dd-trace-go/v2 v2.2.2 github.com/apparentlymart/go-cidr v1.1.0 github.com/aws/aws-sdk-go-v2 v1.38.0 github.com/aws/aws-sdk-go-v2/config v1.31.0 @@ -40,7 +41,6 @@ require ( google.golang.org/api v0.247.0 google.golang.org/grpc v1.74.2 google.golang.org/protobuf v1.36.7 - gopkg.in/DataDog/dd-trace-go.v1 v1.74.5 k8s.io/api v0.33.4 k8s.io/apimachinery v0.33.4 k8s.io/client-go v0.33.4 @@ -69,7 +69,6 @@ require ( github.com/DataDog/datadog-agent/pkg/util/scrubber v0.67.0 // indirect github.com/DataDog/datadog-agent/pkg/version v0.67.0 // indirect github.com/DataDog/datadog-go/v5 v5.6.0 // indirect - github.com/DataDog/dd-trace-go/v2 v2.2.2 // indirect github.com/DataDog/go-libddwaf/v4 v4.3.2 // indirect github.com/DataDog/go-runtime-metrics-internal v0.0.4-0.20250721125240-fdf1ef85b633 // indirect github.com/DataDog/go-sqllexer v0.1.6 // indirect diff --git a/go.sum b/go.sum index 2a24f5395..d180013b2 100644 --- a/go.sum +++ b/go.sum @@ -197,8 +197,8 @@ github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8= -github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= +github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad h1:a6HEuzUHeKH6hwfN/ZoQgRgVIWFJljSWa/zetS2WTvg= +github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0= github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -532,8 +532,6 @@ google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzi google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A= google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= -gopkg.in/DataDog/dd-trace-go.v1 v1.74.5 h1:1bieplkmiiHV6RwefRAgkcSMQrGyjfrucvoKhBda0Og= -gopkg.in/DataDog/dd-trace-go.v1 v1.74.5/go.mod h1:h35rC0PZzJBCbnql3z/IkQhVEvj4j9wF4/DvuS/7Dzc= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/plugin/trace/setup.go b/plugin/trace/setup.go index 8672dcc53..5721368fb 100644 --- a/plugin/trace/setup.go +++ b/plugin/trace/setup.go @@ -25,6 +25,7 @@ func setup(c *caddy.Controller) error { }) c.OnStartup(t.OnStartup) + c.OnShutdown(t.OnShutdown) return nil } diff --git a/plugin/trace/trace.go b/plugin/trace/trace.go index f7409679d..f3d0155b6 100644 --- a/plugin/trace/trace.go +++ b/plugin/trace/trace.go @@ -19,6 +19,8 @@ import ( _ "github.com/coredns/coredns/plugin/pkg/trace" // Plugin the trace package. "github.com/coredns/coredns/request" + "github.com/DataDog/dd-trace-go/v2/ddtrace/ext" + "github.com/DataDog/dd-trace-go/v2/ddtrace/tracer" "github.com/miekg/dns" ot "github.com/opentracing/opentracing-go" otext "github.com/opentracing/opentracing-go/ext" @@ -26,10 +28,6 @@ import ( zipkinot "github.com/openzipkin-contrib/zipkin-go-opentracing" "github.com/openzipkin/zipkin-go" zipkinhttp "github.com/openzipkin/zipkin-go/reporter/http" - "gopkg.in/DataDog/dd-trace-go.v1/ddtrace" - "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/ext" - "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/opentracer" - "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer" ) const ( @@ -70,7 +68,7 @@ type trace struct { Next plugin.Handler Endpoint string EndpointType string - tracer ot.Tracer + zipkinTracer ot.Tracer serviceEndpoint string serviceName string clientServer bool @@ -84,7 +82,7 @@ type trace struct { } func (t *trace) Tracer() ot.Tracer { - return t.tracer + return t.zipkinTracer } // OnStartup sets up the tracer @@ -95,15 +93,14 @@ func (t *trace) OnStartup() error { case "zipkin": err = t.setupZipkin() case "datadog": - tracer := opentracer.New( + tracer.Start( tracer.WithAgentAddr(t.Endpoint), tracer.WithDebugMode(clog.D.Value()), tracer.WithGlobalTag(ext.SpanTypeDNS, true), - tracer.WithServiceName(t.serviceName), + tracer.WithService(t.serviceName), tracer.WithAnalyticsRate(t.datadogAnalyticsRate), tracer.WithLogger(&loggerAdapter{log}), ) - t.tracer = tracer t.tagSet = tagByProvider["datadog"] default: err = fmt.Errorf("unknown endpoint type: %s", t.EndpointType) @@ -112,6 +109,14 @@ func (t *trace) OnStartup() error { return err } +// OnShutdown cleans up the tracer +func (t *trace) OnShutdown() error { + if t.EndpointType == "datadog" { + tracer.Stop() + } + return nil +} + func (t *trace) setupZipkin() error { var opts []zipkinhttp.ReporterOption opts = append(opts, zipkinhttp.Logger(stdlog.New(&loggerAdapter{log}, "", 0))) @@ -137,7 +142,7 @@ func (t *trace) setupZipkin() error { if err != nil { return err } - t.tracer = zipkinot.Wrap(tracer) + t.zipkinTracer = zipkinot.Wrap(tracer) t.tagSet = tagByProvider["default"] return err @@ -148,16 +153,42 @@ func (t *trace) Name() string { return "trace" } // ServeDNS implements the plugin.Handle interface. func (t *trace) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) { - trace := false + shouldTrace := false if t.every > 0 { queryNr := atomic.AddUint64(&t.count, 1) - if queryNr%t.every == 0 { - trace = true + shouldTrace = true } } + + if t.EndpointType == "datadog" { + return t.serveDNSDatadog(ctx, w, r, shouldTrace) + } + return t.serveDNSZipkin(ctx, w, r, shouldTrace) +} + +func (t *trace) serveDNSDatadog(ctx context.Context, w dns.ResponseWriter, r *dns.Msg, shouldTrace bool) (int, error) { + if !shouldTrace { + return plugin.NextOrFailure(t.Name(), t.Next, ctx, w, r) + } + + span, spanCtx := tracer.StartSpanFromContext(ctx, defaultTopLevelSpanName) + defer span.Finish() + + metadata.SetValueFunc(ctx, metaTraceIdKey, func() string { return span.Context().TraceID() }) + + req := request.Request{W: w, Req: r} + rw := dnstest.NewRecorder(w) + status, err := plugin.NextOrFailure(t.Name(), t.Next, spanCtx, rw, r) + + t.setDatadogSpanTags(span, req, rw, status, err) + + return status, err +} + +func (t *trace) serveDNSZipkin(ctx context.Context, w dns.ResponseWriter, r *dns.Msg, shouldTrace bool) (int, error) { span := ot.SpanFromContext(ctx) - if !trace || span != nil { + if !shouldTrace || span != nil { return plugin.NextOrFailure(t.Name(), t.Next, ctx, w, r) } @@ -172,17 +203,39 @@ func (t *trace) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) span = t.Tracer().StartSpan(defaultTopLevelSpanName, otext.RPCServerOption(spanCtx)) defer span.Finish() - switch spanCtx := span.Context().(type) { - case zipkinot.SpanContext: + if spanCtx, ok := span.Context().(zipkinot.SpanContext); ok { metadata.SetValueFunc(ctx, metaTraceIdKey, func() string { return spanCtx.TraceID.String() }) - case ddtrace.SpanContext: - metadata.SetValueFunc(ctx, metaTraceIdKey, func() string { return fmt.Sprint(spanCtx.TraceID()) }) } rw := dnstest.NewRecorder(w) ctx = ot.ContextWithSpan(ctx, span) status, err := plugin.NextOrFailure(t.Name(), t.Next, ctx, rw, r) + t.setZipkinSpanTags(span, req, rw, status, err) + + return status, err +} + +// setDatadogSpanTags sets span tags using DataDog v2 API +func (t *trace) setDatadogSpanTags(span *tracer.Span, req request.Request, rw *dnstest.Recorder, status int, err error) { + span.SetTag(t.tagSet.Name, req.Name()) + span.SetTag(t.tagSet.Type, req.Type()) + span.SetTag(t.tagSet.Proto, req.Proto()) + span.SetTag(t.tagSet.Remote, req.IP()) + rc := rw.Rcode + if !plugin.ClientWrite(status) { + rc = status + } + span.SetTag(t.tagSet.Rcode, rcode.ToString(rc)) + if err != nil { + span.SetTag("error.message", err.Error()) + span.SetTag("error", true) + span.SetTag("error.type", "dns_error") + } +} + +// setZipkinSpanTags sets span tags for Zipkin/OpenTracing spans +func (t *trace) setZipkinSpanTags(span ot.Span, req request.Request, rw *dnstest.Recorder, status int, err error) { span.SetTag(t.tagSet.Name, req.Name()) span.SetTag(t.tagSet.Type, req.Type()) span.SetTag(t.tagSet.Proto, req.Proto()) @@ -196,9 +249,8 @@ func (t *trace) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) } span.SetTag(t.tagSet.Rcode, rcode.ToString(rc)) if err != nil { + // Use OpenTracing error handling otext.Error.Set(span, true) span.LogFields(otlog.Event("error"), otlog.Error(err)) } - - return status, err } diff --git a/plugin/trace/trace_test.go b/plugin/trace/trace_test.go index 28bdaf51c..ce78bfdc1 100644 --- a/plugin/trace/trace_test.go +++ b/plugin/trace/trace_test.go @@ -5,6 +5,7 @@ import ( "errors" "net/http" "net/http/httptest" + "strings" "testing" "github.com/coredns/caddy" @@ -15,9 +16,10 @@ import ( "github.com/coredns/coredns/plugin/test" "github.com/coredns/coredns/request" + "github.com/DataDog/dd-trace-go/v2/ddtrace/mocktracer" "github.com/miekg/dns" "github.com/opentracing/opentracing-go" - "github.com/opentracing/opentracing-go/mocktracer" + openTracingMock "github.com/opentracing/opentracing-go/mocktracer" ) func TestStartup(t *testing.T) { @@ -83,7 +85,7 @@ func TestTrace(t *testing.T) { for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { w := dnstest.NewRecorder(&test.ResponseWriter{}) - m := mocktracer.New() + m := openTracingMock.New() tr := &trace{ Next: test.HandlerFunc(func(_ context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) { if plugin.ClientWrite(tc.status) { @@ -93,9 +95,9 @@ func TestTrace(t *testing.T) { } return tc.status, tc.err }), - every: 1, - tracer: m, - tagSet: defaultTagSet, + every: 1, + zipkinTracer: m, + tagSet: defaultTagSet, } ctx := context.TODO() if _, err := tr.ServeDNS(ctx, w, tc.question); err != nil && tc.err == nil { @@ -138,7 +140,7 @@ func TestTrace(t *testing.T) { func TestTrace_DOH_TraceHeaderExtraction(t *testing.T) { w := dnstest.NewRecorder(&test.ResponseWriter{}) - m := mocktracer.New() + m := openTracingMock.New() tr := &trace{ Next: test.HandlerFunc(func(_ context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) { if plugin.ClientWrite(dns.RcodeSuccess) { @@ -148,8 +150,8 @@ func TestTrace_DOH_TraceHeaderExtraction(t *testing.T) { } return dns.RcodeSuccess, nil }), - every: 1, - tracer: m, + every: 1, + zipkinTracer: m, } q := new(dns.Msg).SetQuestion("example.net.", dns.TypeA) @@ -166,9 +168,163 @@ func TestTrace_DOH_TraceHeaderExtraction(t *testing.T) { fs := m.FinishedSpans() rootCoreDNSspan := fs[1] - rootCoreDNSTraceID := rootCoreDNSspan.Context().(mocktracer.MockSpanContext).TraceID - outsideSpanTraceID := outsideSpan.Context().(mocktracer.MockSpanContext).TraceID + rootCoreDNSTraceID := rootCoreDNSspan.Context().(openTracingMock.MockSpanContext).TraceID + outsideSpanTraceID := outsideSpan.Context().(openTracingMock.MockSpanContext).TraceID if rootCoreDNSTraceID != outsideSpanTraceID { t.Errorf("Unexpected traceID: rootSpan.TraceID: want %v, got %v", rootCoreDNSTraceID, outsideSpanTraceID) } } + +func TestStartup_Datadog(t *testing.T) { + m, err := traceParse(caddy.NewTestController("dns", `trace datadog localhost:8126`)) + if err != nil { + t.Errorf("Error parsing test input: %s", err) + return + } + if m.Name() != "trace" { + t.Errorf("Wrong name from GetName: %s", m.Name()) + } + + // Test that we can start and stop the DataDog tracer without errors + err = m.OnStartup() + if err != nil { + t.Errorf("Error starting DataDog tracing plugin: %s", err) + return + } + + if m.tagSet != tagByProvider["datadog"] { + t.Errorf("TagSet for DataDog hasn't been correctly initialized") + } + + // Test shutdown + err = m.OnShutdown() + if err != nil { + t.Errorf("Error shutting down DataDog tracing plugin: %s", err) + } +} + +func TestTrace_DataDog(t *testing.T) { + // Test the complete DataDog tracing flow using mocktracer + mt := mocktracer.Start() + defer mt.Stop() + + cases := []struct { + name string + rcode int + status int + question *dns.Msg + err error + }{ + { + name: "NXDOMAIN", + rcode: dns.RcodeNameError, + status: dns.RcodeSuccess, + question: new(dns.Msg).SetQuestion("example.org.", dns.TypeA), + }, + { + name: "NOERROR", + rcode: dns.RcodeSuccess, + status: dns.RcodeSuccess, + question: new(dns.Msg).SetQuestion("example.net.", dns.TypeCNAME), + }, + { + name: "SERVFAIL with error", + rcode: dns.RcodeServerFailure, + status: dns.RcodeSuccess, + question: new(dns.Msg).SetQuestion("example.net.", dns.TypeA), + err: errors.New("test error"), + }, + } + + datadogTagSet := tagByProvider["datadog"] + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + // Reset spans for each test + mt.Reset() + + w := dnstest.NewRecorder(&test.ResponseWriter{}) + tr := &trace{ + Next: test.HandlerFunc(func(_ context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) { + if plugin.ClientWrite(tc.status) { + m := new(dns.Msg) + m.SetRcode(r, tc.rcode) + w.WriteMsg(m) + } + return tc.status, tc.err + }), + every: 1, + EndpointType: "datadog", + tagSet: datadogTagSet, + } + + ctx := context.TODO() + if _, err := tr.ServeDNS(ctx, w, tc.question); err != nil && tc.err == nil { + t.Fatalf("Error during tr.ServeDNS(ctx, w, %v): %v", tc.question, err) + } + + spans := mt.FinishedSpans() + if len(spans) == 0 { + t.Fatal("Expected at least one span, got none") + } + + // Find the DNS span + var dnsSpan *mocktracer.Span + for _, span := range spans { + if span.OperationName() == defaultTopLevelSpanName { + dnsSpan = span + break + } + } + + if dnsSpan == nil { + t.Fatal("Could not find DNS span with operation name 'servedns'") + } + + req := request.Request{W: w, Req: tc.question} + + // Test DataDog-specific tags + if dnsSpan.Tag(datadogTagSet.Name) != req.Name() { + t.Errorf("Unexpected span tag: span.Tag(%v): want %v, got %v", + datadogTagSet.Name, req.Name(), dnsSpan.Tag(datadogTagSet.Name)) + } + if dnsSpan.Tag(datadogTagSet.Type) != req.Type() { + t.Errorf("Unexpected span tag: span.Tag(%v): want %v, got %v", + datadogTagSet.Type, req.Type(), dnsSpan.Tag(datadogTagSet.Type)) + } + if dnsSpan.Tag(datadogTagSet.Proto) != req.Proto() { + t.Errorf("Unexpected span tag: span.Tag(%v): want %v, got %v", + datadogTagSet.Proto, req.Proto(), dnsSpan.Tag(datadogTagSet.Proto)) + } + if dnsSpan.Tag(datadogTagSet.Remote) != req.IP() { + t.Errorf("Unexpected span tag: span.Tag(%v): want %v, got %v", + datadogTagSet.Remote, req.IP(), dnsSpan.Tag(datadogTagSet.Remote)) + } + if dnsSpan.Tag(datadogTagSet.Rcode) != rcode.ToString(tc.rcode) { + t.Errorf("Unexpected span tag: span.Tag(%v): want %v, got %v", + datadogTagSet.Rcode, rcode.ToString(tc.rcode), dnsSpan.Tag(datadogTagSet.Rcode)) + } + + // Test DataDog v2 error handling + if tc.err != nil { + errorMsg := dnsSpan.Tag("error.message") + if errorMsg == nil { + t.Error("Expected error.message tag to be set") + } else if !strings.Contains(errorMsg.(string), "test error") { + t.Errorf("Expected error.message to contain 'test error', got %v", errorMsg) + } + + // Check error type tag + errorType := dnsSpan.Tag("error.type") + if errorType == nil { + t.Error("Expected error.type tag to be set") + } + } + + // Verify trace ID exists (mocktracer uses uint64) + traceID := dnsSpan.TraceID() + if traceID == 0 { + t.Error("Expected non-zero trace ID") + } + }) + } +}