Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CECO-1153] APM e2e test #1549

Merged
merged 13 commits into from
Jan 10, 2025
3 changes: 2 additions & 1 deletion test/e2e/kind_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@ package e2e
import (
"context"
"fmt"
"github.com/DataDog/datadog-agent/test/new-e2e/pkg/provisioners"
"path/filepath"
"strconv"
"strings"
"testing"
"time"

"github.com/DataDog/datadog-agent/test/new-e2e/pkg/provisioners"

Comment on lines +20 to +21
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is due to my IDE setting, is no-op and this kind_test.go will be removed eventually post refactor

"github.com/DataDog/datadog-agent/test/new-e2e/pkg/components"
"github.com/DataDog/datadog-agent/test/new-e2e/pkg/e2e"
"github.com/DataDog/datadog-agent/test/new-e2e/pkg/runner"
Expand Down
19 changes: 19 additions & 0 deletions test/e2e/manifests/new_manifests/apm/datadog-agent-apm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
apiVersion: datadoghq.com/v2alpha1
kind: DatadogAgent
metadata:
namespace: e2e-operator
labels:
agent.datadoghq.com/e2e-test: datadog-agent-apm
spec:
global:
kubelet:
tlsVerify: false
features:
apm:
enabled: true
hostPortConfig:
enabled: true
hostPort: 8126
unixDomainSocketConfig:
enabled: true
path: /var/run/datadog/apm.socket
80 changes: 80 additions & 0 deletions test/e2e/manifests/new_manifests/apm/tracegen-deploy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: tracegen-tribrid
namespace: e2e-operator
labels:
app: tracegen-tribrid
spec:
replicas: 1
selector:
matchLabels:
app: tracegen-tribrid
template:
metadata:
labels:
app: tracegen-tribrid
spec:
containers:
- name: tracegen-tcp-hostip
image: ghcr.io/datadog/apps-tracegen:main
env:
# IP of the node - listened by the trace-Agent if hostPort is enabled
- name: DD_AGENT_HOST
valueFrom:
fieldRef:
fieldPath: status.hostIP
- name: DD_SERVICE
value: "e2e-test-apm-hostip"
- name: DD_ENV
value: "e2e-operator"
resources:
requests:
memory: "32Mi"
cpu: "2m"
limits:
memory: "32Mi"
cpu: "10m"
- name: tracegen-tcp-agent-service
image: ghcr.io/datadog/apps-tracegen:main
env:
# Kubernetes service of the node Agent - enabled by default with the APM feature
# The service is created by the Datadog Operator following convention: <datadog-agent-name>-agent
- name: DD_AGENT_HOST
value: "datadog-agent-apm-agent"
- name: DD_SERVICE
value: "e2e-test-apm-agent-service"
- name: DD_ENV
value: "e2e-operator"
resources:
requests:
memory: "32Mi"
cpu: "2m"
limits:
memory: "32Mi"
cpu: "10m"
- name: tracegen-udp
image: ghcr.io/datadog/apps-tracegen:main
# Socket of the trace-agent
env:
- name: DD_TRACE_AGENT_URL
value: "unix:///var/run/datadog/apm.socket"
- name: DD_SERVICE
value: "e2e-test-apm-socket"
- name: DD_ENV
value: "e2e-operator"
resources:
requests:
memory: "32Mi"
cpu: "2m"
limits:
memory: "32Mi"
cpu: "10m"
volumeMounts:
- name: apmsocketpath
mountPath: /var/run/datadog/apm.socket
volumes:
- name: apmsocketpath
hostPath:
path: /var/run/datadog/apm.socket
type: Socket
66 changes: 64 additions & 2 deletions test/e2e/tests/k8s_suite/k8s_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,7 @@ func (s *k8sSuite) TestGenericK8s() {
utils.VerifyAgentPods(s.T(), c, common.NamespaceName, s.Env().KubernetesCluster.Client(), common.NodeAgentSelector+",agent.datadoghq.com/name=dda-autodiscovery")

// check agent pods for http check
agentPods, err := s.Env().KubernetesCluster.Client().CoreV1().Pods(common.NamespaceName).List(context.TODO(), metav1.ListOptions{LabelSelector: common.NodeAgentSelector + ",agent.datadoghq.com/name=dda-autodiscovery",
FieldSelector: "status.phase=Running"})
agentPods, err := s.Env().KubernetesCluster.Client().CoreV1().Pods(common.NamespaceName).List(context.TODO(), metav1.ListOptions{LabelSelector: common.NodeAgentSelector + ",agent.datadoghq.com/name=dda-autodiscovery"})
assert.NoError(c, err)

for _, pod := range agentPods.Items {
Expand Down Expand Up @@ -254,6 +253,59 @@ func (s *k8sSuite) TestGenericK8s() {
s.verifyAPILogs()
}, 900*time.Second, 15*time.Second, "could not valid logs collection in time")
})

s.T().Run("APM hostPort k8s service UDP works", func(t *testing.T) {
var apmAgentSelector = ",agent.datadoghq.com/name=datadog-agent-apm"

ddaConfigPath, err := common.GetAbsPath(filepath.Join(common.ManifestsPath, "apm", "datadog-agent-apm.yaml"))
assert.NoError(s.T(), err)

ddaOpts := []agentwithoperatorparams.Option{
agentwithoperatorparams.WithDDAConfig(agentwithoperatorparams.DDAConfig{
Name: "datadog-agent-apm",
YamlFilePath: ddaConfigPath,
}),
}
ddaOpts = append(ddaOpts, defaultDDAOpts...)

provisionerOptions := []provisioners.KubernetesProvisionerOption{
provisioners.WithTestName("e2e-operator-apm"),
provisioners.WithDDAOptions(ddaOpts...),
provisioners.WithYAMLWorkload(provisioners.YAMLWorkload{
Name: "tracegen-deploy",
Path: strings.Join([]string{common.ManifestsPath, "apm", "tracegen-deploy.yaml"}, "/"),
}),
provisioners.WithLocal(s.local),
}
provisionerOptions = append(provisionerOptions, defaultProvisionerOpts...)

// Deploy trace generator
s.UpdateEnv(provisioners.KubernetesProvisioner(provisionerOptions...))

// Verify traces collection on agent pod
s.EventuallyWithTf(func(c *assert.CollectT) {
// Verify tracegen deployment is running
utils.VerifyNumPodsForSelector(s.T(), c, common.NamespaceName, s.Env().KubernetesCluster.Client(), 1, "app=tracegen-tribrid")

// Verify agent pods are running
utils.VerifyAgentPods(s.T(), c, common.NamespaceName, s.Env().KubernetesCluster.Client(), common.NodeAgentSelector+apmAgentSelector)
agentPods, err := s.Env().KubernetesCluster.Client().CoreV1().Pods(common.NamespaceName).List(context.TODO(), metav1.ListOptions{LabelSelector: common.NodeAgentSelector + apmAgentSelector})
assert.NoError(c, err)

// This works because we have a single Agent pod (so located on same node as tracegen)
// Otherwise, we would need to deploy tracegen on the same node as the Agent pod / as a DaemonSet
for _, pod := range agentPods.Items {

output, _, err := s.Env().KubernetesCluster.KubernetesClient.PodExec(common.NamespaceName, pod.Name, "agent", []string{"agent", "status", "apm agent", "-j"})
assert.NoError(c, err)

utils.VerifyAgentTraces(c, output)
}

// Verify traces collection with API client
s.verifyAPITraces(c)
}, 600*time.Second, 15*time.Second, "could not validate traces on agent pod") // TODO: check duration
})
}

func (s *k8sSuite) verifyAPILogs() {
Expand All @@ -262,6 +314,16 @@ func (s *k8sSuite) verifyAPILogs() {
s.Assert().NotEmptyf(logs, fmt.Sprintf("Expected fake intake-ingested logs to not be empty: %s", err))
}

func (s *k8sSuite) verifyAPITraces(c *assert.CollectT) {
traces, err := s.Env().FakeIntake.Client().GetTraces()
s.Assert().NoError(err)
s.Assert().NotEmptyf(traces, fmt.Sprintf("Expected fake intake-ingested traces to not be empty: %s", err))

// TODO: implement finer trace verification by checking tags
// https://github.com/DataDog/datadog-agent/blob/271a3aa2b5ec9c00c3d845a048c652e4b21e6659/test/new-e2e/tests/containers/k8s_test.go#L1451

}

func (s *k8sSuite) verifyKSMCheck(c *assert.CollectT) {
metricNames, err := s.Env().FakeIntake.Client().GetMetricNames()
assert.NoError(c, err)
Expand Down
33 changes: 30 additions & 3 deletions test/e2e/tests/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@ package utils
import (
"context"
"fmt"
"strconv"
"strings"
"testing"

"github.com/DataDog/datadog-operator/test/e2e/common"
"github.com/stretchr/testify/assert"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubeClient "k8s.io/client-go/kubernetes"
"strconv"
"strings"
"testing"
)

func VerifyOperator(t *testing.T, c *assert.CollectT, namespace string, k8sClient kubeClient.Interface) {
Expand Down Expand Up @@ -106,3 +107,29 @@ func VerifyAgentPodLogs(c *assert.CollectT, collectorOutput string) {
totalIntegrations := len(agentLogs)
assert.True(c, tailedIntegrations >= totalIntegrations*80/100, "Expected at least 80%% of integrations to be tailed, got %d/%d", tailedIntegrations, totalIntegrations)
}

func VerifyAgentTraces(c *assert.CollectT, collectorOutput string) {
apmAgentJson := common.ParseCollectorJson(collectorOutput)
// The order of services in the Agent JSON output is not guaranteed.
// We use a map to assert that we have received traces for all expected services.
expectedServices := map[string]bool{
"e2e-test-apm-hostip": true,
"e2e-test-apm-socket": true,
"e2e-test-apm-agent-service": true,
}
// Track found services
foundServices := map[string]bool{}

if apmAgentJson != nil {
apmStats := apmAgentJson["apmStats"].(map[string]interface{})["receiver"].([]interface{})
for _, service := range apmStats {
serviceName := service.(map[string]interface{})["Service"].(string)
tracesReceived := service.(map[string]interface{})["TracesReceived"].(float64)
// Ensure we received at least one trace for the service
assert.Greater(c, tracesReceived, float64(0), "Expected traces to be received for service %s", serviceName)
// Mark the service as found
foundServices[serviceName] = true
}
}
assert.Equal(c, expectedServices, foundServices, "The found services do not match the expected services")
}
Loading