diff --git a/.gitignore b/.gitignore index 5f644de46..34f05fd81 100644 --- a/.gitignore +++ b/.gitignore @@ -89,6 +89,8 @@ docker/mongodb-kubernetes-tests/.test_identifiers* logs-debug/ /ssdlc-report/* +tools/mdbdebug/bin* +tools/diffwatch/bin* .gocache/ docs/**/log/* diff --git a/controllers/operator/mongodbshardedcluster_controller.go b/controllers/operator/mongodbshardedcluster_controller.go index fb98984ef..faec36ee2 100644 --- a/controllers/operator/mongodbshardedcluster_controller.go +++ b/controllers/operator/mongodbshardedcluster_controller.go @@ -2868,6 +2868,10 @@ func (r *ShardedClusterReconcileHelper) statefulsetLabels() map[string]string { return merge.StringToStringMap(r.sc.Labels, r.sc.GetOwnerLabels()) } +func (r *ShardedClusterReconcileHelper) DesiredShardsConfiguration() map[int]*mdbv1.ShardedClusterComponentSpec { + return r.desiredShardsConfiguration +} + func (r *ShardedClusterReconcileHelper) ShardsMemberClustersMap() map[int][]multicluster.MemberCluster { return r.shardsMemberClustersMap } diff --git a/tools/diffwatch/Dockerfile b/tools/diffwatch/Dockerfile new file mode 100644 index 000000000..8abca000c --- /dev/null +++ b/tools/diffwatch/Dockerfile @@ -0,0 +1,15 @@ +FROM alpine:latest + +RUN apk add --update --no-cache python3 py3-pip && ln -sf python3 /usr/bin/python +RUN apk add bash tmux kubectl htop less fzf yq lnav +RUN pip install tmuxp --break-system-packages + +# Create directory for lnav formats +RUN mkdir -p /root/.lnav/formats/installed/ + +COPY bin_linux/diffwatch /usr/local/bin/ +COPY lnav/*.json /root/.lnav/formats/installed/ +ADD retry_cmd.sh /usr/local/bin/ +RUN chmod +x /usr/local/bin/retry_cmd.sh + +CMD ["/bin/bash"] diff --git a/tools/diffwatch/build.sh b/tools/diffwatch/build.sh new file mode 100755 index 000000000..37f81f34c --- /dev/null +++ b/tools/diffwatch/build.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +set -Eeou pipefail + +script_name=$(readlink -f "${BASH_SOURCE[0]}") +script_dir=$(dirname "${script_name}") + +pushd "${script_dir}" >/dev/null 2>&1 +mkdir bin bin_linux >/dev/null 2>&1 || true + +echo "Building diffwatch from $(pwd) directory" +GOOS=linux GOARCH=amd64 go build -o bin_linux ./... +go build -o bin ./... + +echo "Copying diffwatch from $(pwd) to ${PROJECT_DIR}/bin" +cp bin/diffwatch "${PROJECT_DIR}"/bin diff --git a/tools/diffwatch/build_docker.sh b/tools/diffwatch/build_docker.sh new file mode 100755 index 000000000..00ec2c605 --- /dev/null +++ b/tools/diffwatch/build_docker.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +set -Eeou pipefail + +export TAG=${TAG:-"latest"} + +docker build --platform linux/amd64 -t "quay.io/lsierant/diffwatch:${TAG}" . +docker push "quay.io/lsierant/diffwatch:${TAG}" diff --git a/tools/diffwatch/cmd/diffwatch/diffwatch.go b/tools/diffwatch/cmd/diffwatch/diffwatch.go new file mode 100644 index 000000000..10dc2440a --- /dev/null +++ b/tools/diffwatch/cmd/diffwatch/diffwatch.go @@ -0,0 +1,105 @@ +package main + +import ( + "context" + "flag" + "fmt" + "io" + "log" + "os" + "os/signal" + "path" + "strings" + "syscall" + + "github.com/mongodb/mongodb-kubernetes/diffwatch/pkg/diffwatch" +) + +type arrayFlags []string + +func (i *arrayFlags) String() string { + return strings.Join(*i, ",") +} + +func (i *arrayFlags) Set(value string) error { + *i = append(*i, value) + return nil +} + +func main() { + ctx, cancel := context.WithCancel(context.Background()) + + signalChan := make(chan os.Signal, 1) + signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM) + + go func() { + <-signalChan + cancel() + }() + + readFromStdin := isInPipeMode() + var inputStream io.Reader + if readFromStdin { + inputStream = os.Stdin + } + + var filePath string + var destDir string + var linesAfter int + var linesBefore int + var linesContext int + var ignores arrayFlags + flag.StringVar(&filePath, "file", "", "Path to the JSON file that will be periodically observed for changes. Optional when the content is piped on stdin. Required when -destDir is specified. "+ + "If reading from stdin, then path is not relevant (file won't be read), but the file name will be used for the diff files prefix stored in destDir.") + flag.StringVar(&destDir, "destDir", "", "Path to the destination directory to store diffs. Optional. If not set, then diff files won't be created. "+ + "If specified, then -file parameter is required. The files will be prefixed with file name of the -file parameter.") + flag.IntVar(&linesAfter, "A", 0, "Number of lines printed after a match (default 0)") + flag.IntVar(&linesBefore, "B", 0, "Number of lines printed before a match (default 0)") + flag.IntVar(&linesContext, "C", 3, "Number of context lines printed before and after (equivalent to setting -A and -B) (default = 3)") + flag.Var(&ignores, "ignore", "Regex pattern to ignore triggering diff if the only changes are ignored ones; you can specify multiple --ignore parameters, e.g. --ignore timestamp --ignore '\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{3}Z' (ignore all lines with changed timestamp)") + flag.Parse() + + for ignore := range ignores { + fmt.Println("ignore = ", ignore) + } + + if linesBefore == 0 { + linesBefore = linesContext + } + if linesAfter == 0 { + linesAfter = linesContext + } + + if err := watchChanges(ctx, filePath, destDir, inputStream, linesBefore, linesAfter, ignores); err != nil { + cancel() + if err == io.EOF { + log.Printf("Reached end of stream. Exiting.") + } else { + log.Printf("Error: %v", err) + } + os.Exit(1) + } +} + +func isInPipeMode() bool { + stat, _ := os.Stdin.Stat() + return (stat.Mode() & os.ModeCharDevice) == 0 +} + +func watchChanges(ctx context.Context, filePath string, destDir string, inputStream io.Reader, linesBefore int, linesAfter int, ignores []string) error { + diffWriterFunc := diffwatch.WriteDiffFiles(destDir, path.Base(filePath)) + jsonDiffer, err := diffwatch.NewJsonDiffer(linesBefore, linesAfter, diffWriterFunc, ignores) + if err != nil { + return err + } + + // parsedFileChannel is filled in the background by reading from stream or watching the file periodically + parsedFileChannel := make(chan diffwatch.ParsedFileWrapper) + if inputStream != nil { + go diffwatch.ReadAndParseFromStream(ctx, inputStream, filePath, parsedFileChannel) + } else { + go diffwatch.ReadAndParseFilePeriodically(ctx, filePath, diffwatch.DefaultWatchInterval, parsedFileChannel) + } + + return diffwatch.WatchFileChangesPeriodically(ctx, filePath, parsedFileChannel, jsonDiffer.FileChangedHandler) +} diff --git a/tools/diffwatch/cmd/diffwatch/diffwatch_int_test.go b/tools/diffwatch/cmd/diffwatch/diffwatch_int_test.go new file mode 100644 index 000000000..48897fdba --- /dev/null +++ b/tools/diffwatch/cmd/diffwatch/diffwatch_int_test.go @@ -0,0 +1,120 @@ +package main + +import ( + "bytes" + "context" + "fmt" + "io" + "os" + "testing" + "time" + + "github.com/mongodb/mongodb-kubernetes/diffwatch/pkg/diffwatch" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// ignored project-root/tmp dir where test outputs will be stored +const tmpDir = "../../../../tmp/diffwatch" +const cleanupAfterTest = false + +// TestDiffWatcherFromFile is a manual test that triggers simulated sequence of changes. +// Intended for manual inspection of files. +// +// How to run: +// 1. Create ops-manager-kubernetes/tmp directory +// 2. Comment t.Skip and run the test +// 3. View latest files: +// find $(find tmp/diffwatch -d 1 -type d | sort -n | tail -n 1) -type f | sort -rV | fzf --preview 'cat {}' +func TestDiffWatcherFromFile(t *testing.T) { + t.Skip("Test intended to manual run, comment skip to run") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + require.NoError(t, os.MkdirAll(tmpDir, 0770)) + tempDir, err := os.MkdirTemp(tmpDir, time.Now().Format("20060102_150405")) + require.NoError(t, err) + defer func() { + if cleanupAfterTest { + _ = os.RemoveAll(tempDir) + } + }() + + watchedFile := fmt.Sprintf("%s/watched.json", tempDir) + go watchChanges(ctx, watchedFile, tempDir, nil, 4, 4, []string{"ignoredField", `\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}`}) + + diffwatch.DefaultWatchInterval = time.Millisecond * 100 + diffwatch.DefaultProgressInterval = time.Millisecond * 200 + applyChange(t, "resources/base.json", watchedFile) + applyChange(t, "resources/changed_1.json", watchedFile) + applyChange(t, "resources/changed_2.json", watchedFile) + time.Sleep(diffwatch.DefaultProgressInterval * 2) + applyChange(t, "resources/changed_3.json", watchedFile) + applyChange(t, "resources/changed_3_ignored_only.json", watchedFile) + applyChange(t, "resources/changed_3_ignored_only_2.json", watchedFile) + applyChange(t, "resources/changed_4_ignored_only_ts.json", watchedFile) + applyChange(t, "resources/changed_4_ignored_only_ts_and_other.json", watchedFile) + applyChange(t, "resources/changed_5.json", watchedFile) +} + +func TestDiffWatcherFromStream(t *testing.T) { + t.Skip("Test intended to manual run, comment skip to run") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + buf := bytes.Buffer{} + files := []string{ + "resources/base.json", + "resources/changed_1.json", + "resources/changed_2.json", + "resources/changed_3.json", + "resources/changed_4_ignored_only_ts.json", + "resources/changed_4_ignored_only_ts_and_other.json", + "resources/changed_5.json", + } + for _, file := range files { + fileBytes, err := os.ReadFile(file) + require.NoError(t, err) + buf.Write(fileBytes) + } + + require.NoError(t, os.MkdirAll(tmpDir, 0770)) + tempDir, err := os.MkdirTemp(tmpDir, time.Now().Format("20060102_150405")) + require.NoError(t, err) + defer func() { + if cleanupAfterTest { + _ = os.RemoveAll(tempDir) + } + }() + + watchedFile := "watched.file" + _ = watchChanges(ctx, watchedFile, tempDir, &buf, 2, 2, []string{"ignoredField", `\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}`}) + cancel() + time.Sleep(time.Second * 1) +} + +func applyChange(t *testing.T, srcFilePath string, dstFilePath string) { + assert.NoError(t, copyFile(srcFilePath, dstFilePath, 0660)) + time.Sleep(diffwatch.DefaultWatchInterval * 2) +} + +func copyFile(srcFilePath string, dstFilePath string, mode os.FileMode) error { + source, err := os.Open(srcFilePath) + if err != nil { + return err + } + defer func() { + _ = source.Close() + }() + + destination, err := os.OpenFile(dstFilePath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode) + if err != nil { + return err + } + defer func() { + _ = destination.Close() + }() + + _, err = io.Copy(destination, source) + return err +} diff --git a/tools/diffwatch/cmd/diffwatch/resources/base.json b/tools/diffwatch/cmd/diffwatch/resources/base.json new file mode 100644 index 000000000..9733ea1b2 --- /dev/null +++ b/tools/diffwatch/cmd/diffwatch/resources/base.json @@ -0,0 +1,38 @@ +{ + "version": 1, + "processes": [ + { + "name": "om-backup-db-0-0", + "disabled": false, + "hostname": "om-backup-db-0-0-svc.lsierant-10.svc.cluster.local", + "args2_6": { + "net": { + "port": 27017, + "tls": { + "certificateKeyFile": "/var/lib/mongodb-automation/secrets/certs/LFKN25MS7RP2OSSJM3ORWIGPUW7VHJ24MOYDC2IXP77ADT45OR3A", + "mode": "requireTLS" + } + }, + "replication": { + "replSetName": "om-backup-db" + }, + "storage": { + "dbPath": "/data" + }, + "systemLog": { + "destination": "file", + "logAppend": false, + "path": "/var/log/mongodb-mms-automation/mongodb.log" + } + }, + "featureCompatibilityVersion": "6.0", + "processType": "mongod", + "version": "6.0.5-ent", + "authSchemaVersion": 5, + "LogRotate": { + "timeThresholdHrs": 0, + "sizeThresholdMB": 0 + } + } + ] +} diff --git a/tools/diffwatch/cmd/diffwatch/resources/changed_1.json b/tools/diffwatch/cmd/diffwatch/resources/changed_1.json new file mode 100644 index 000000000..b38509d3d --- /dev/null +++ b/tools/diffwatch/cmd/diffwatch/resources/changed_1.json @@ -0,0 +1,44 @@ +{ + "version": 1, + "processes": [ + { + "name": "om-backup-db-0-0", + "disabled": true, + "hostname": "om-backup-db-0-0-svc.lsierant-10.svc.cluster.local", + "args2_6": { + "net": { + "port": 27017, + "tls": { + "certificateKeyFile": "/var/lib/mongodb-automation/secrets/certs/LFKN25MS7RP2OSSJM3ORWIGPUW7VHJ24MOYDC2IXP77ADT45OR3A", + "mode": "preferTLS" + } + }, + "replication": { + "replSetName": "om-backup-db" + }, + "storage": { + "dbPath": "/data" + }, + "systemLog": { + "destination": "file", + "logAppend": false, + "path": "/var/log/mongodb-mms-automation/mongodb.log" + } + }, + "featureCompatibilityVersion": "6.0", + "processType": "mongod", + "version": "6.0.6-ent", + "authSchemaVersion": 5, + "newField1": 1, + "newField2": 2, + "newField3": 3, + "newField4": 4, + "newField5": 5, + "LogRotate": { + "timeThresholdHrs": 0, + "sizeThresholdMB": 0 + }, + "newField6": 6 + } + ] +} diff --git a/tools/diffwatch/cmd/diffwatch/resources/changed_2.json b/tools/diffwatch/cmd/diffwatch/resources/changed_2.json new file mode 100644 index 000000000..969ced926 --- /dev/null +++ b/tools/diffwatch/cmd/diffwatch/resources/changed_2.json @@ -0,0 +1,45 @@ +{ + "version": 1, + "processes": [ + { + "name": "om-backup-db-0-0", + "disabled": true, + "ignoredField": 1, + "hostname": "om-backup-db-0-0-svc.lsierant-10.svc.cluster.local", + "args2_6": { + "net": { + "port": 27017, + "tls": { + "certificateKeyFile": "/var/lib/mongodb-automation/secrets/certs/LFKN25MS7RP2OSSJM3ORWIGPUW7VHJ24MOYDC2IXP77ADT45OR3A", + "mode": "preferTLS" + } + }, + "replication": { + "replSetName": "om-backup-db" + }, + "storage": { + "dbPath": "/data" + }, + "systemLog": { + "destination": "file", + "logAppend": false, + "path": "/var/log/mongodb-mms-automation/mongodb.log" + } + }, + "featureCompatibilityVersion": "6.0", + "processType": "mongod", + "version": "6.0.6-ent", + "authSchemaVersion": 5, + "newField1": 1, + "newField2": 2, + "newField3": 3, + "newField4": 4, + "newField5": 5, + "LogRotate": { + "timeThresholdHrs": 0, + "sizeThresholdMB": 0 + }, + "newField6": 6 + } + ] +} diff --git a/tools/diffwatch/cmd/diffwatch/resources/changed_3.json b/tools/diffwatch/cmd/diffwatch/resources/changed_3.json new file mode 100644 index 000000000..9946df6ac --- /dev/null +++ b/tools/diffwatch/cmd/diffwatch/resources/changed_3.json @@ -0,0 +1,46 @@ +{ + "version": 1, + "processes": [ + { + "name": "om-backup-db-0-0", + "disabled": true, + "ignoredField": 1, + "timestamp": "2024-07-16T11:11:48.275Z", + "hostname": "om-backup-db-0-0-svc.lsierant-10.svc.cluster.local", + "args2_6": { + "net": { + "port": 27017, + "tls": { + "certificateKeyFile": "/var/lib/mongodb-automation/secrets/certs/LFKN25MS7RP2OSSJM3ORWIGPUW7VHJ24MOYDC2IXP77ADT45OR3A", + "mode": "preferTLS" + } + }, + "replication": { + "replSetName": "om-backup-db" + }, + "storage": { + "dbPath": "/data" + }, + "systemLog": { + "destination": "file", + "logAppend": false, + "path": "/var/log/mongodb-mms-automation/mongodb.log" + } + }, + "featureCompatibilityVersion": "6.0", + "processType": "mongod", + "version": "6.0.6-ent", + "authSchemaVersion": 5, + "newField1": 1, + "newField2": 2, + "newField3": 3, + "newField4": 4, + "newField5": 6, + "LogRotate": { + "timeThresholdHrs": 0, + "sizeThresholdMB": 0 + }, + "newField6": 6 + } + ] +} diff --git a/tools/diffwatch/cmd/diffwatch/resources/changed_3_ignored_only.json b/tools/diffwatch/cmd/diffwatch/resources/changed_3_ignored_only.json new file mode 100644 index 000000000..a40e0f291 --- /dev/null +++ b/tools/diffwatch/cmd/diffwatch/resources/changed_3_ignored_only.json @@ -0,0 +1,46 @@ +{ + "version": 1, + "processes": [ + { + "name": "om-backup-db-0-0", + "disabled": true, + "ignoredField": 2, + "timestamp": "2024-07-16T11:11:48.275Z", + "hostname": "om-backup-db-0-0-svc.lsierant-10.svc.cluster.local", + "args2_6": { + "net": { + "port": 27017, + "tls": { + "certificateKeyFile": "/var/lib/mongodb-automation/secrets/certs/LFKN25MS7RP2OSSJM3ORWIGPUW7VHJ24MOYDC2IXP77ADT45OR3A", + "mode": "preferTLS" + } + }, + "replication": { + "replSetName": "om-backup-db" + }, + "storage": { + "dbPath": "/data" + }, + "systemLog": { + "destination": "file", + "logAppend": false, + "path": "/var/log/mongodb-mms-automation/mongodb.log" + } + }, + "featureCompatibilityVersion": "6.0", + "processType": "mongod", + "version": "6.0.6-ent", + "authSchemaVersion": 5, + "newField1": 1, + "newField2": 2, + "newField3": 3, + "newField4": 4, + "newField5": 6, + "LogRotate": { + "timeThresholdHrs": 0, + "sizeThresholdMB": 0 + }, + "newField6": 6 + } + ] +} diff --git a/tools/diffwatch/cmd/diffwatch/resources/changed_3_ignored_only_2.json b/tools/diffwatch/cmd/diffwatch/resources/changed_3_ignored_only_2.json new file mode 100644 index 000000000..036fbd894 --- /dev/null +++ b/tools/diffwatch/cmd/diffwatch/resources/changed_3_ignored_only_2.json @@ -0,0 +1,46 @@ +{ + "version": 1, + "processes": [ + { + "name": "om-backup-db-0-0", + "disabled": true, + "ignoredField": 3, + "timestamp": "2024-07-16T11:11:48.275Z", + "hostname": "om-backup-db-0-0-svc.lsierant-10.svc.cluster.local", + "args2_6": { + "net": { + "port": 27017, + "tls": { + "certificateKeyFile": "/var/lib/mongodb-automation/secrets/certs/LFKN25MS7RP2OSSJM3ORWIGPUW7VHJ24MOYDC2IXP77ADT45OR3A", + "mode": "preferTLS" + } + }, + "replication": { + "replSetName": "om-backup-db" + }, + "storage": { + "dbPath": "/data" + }, + "systemLog": { + "destination": "file", + "logAppend": false, + "path": "/var/log/mongodb-mms-automation/mongodb.log" + } + }, + "featureCompatibilityVersion": "6.0", + "processType": "mongod", + "version": "6.0.6-ent", + "authSchemaVersion": 5, + "newField1": 1, + "newField2": 2, + "newField3": 3, + "newField4": 4, + "newField5": 6, + "LogRotate": { + "timeThresholdHrs": 0, + "sizeThresholdMB": 0 + }, + "newField6": 6 + } + ] +} diff --git a/tools/diffwatch/cmd/diffwatch/resources/changed_4_ignored_only_ts.json b/tools/diffwatch/cmd/diffwatch/resources/changed_4_ignored_only_ts.json new file mode 100644 index 000000000..c024be7e2 --- /dev/null +++ b/tools/diffwatch/cmd/diffwatch/resources/changed_4_ignored_only_ts.json @@ -0,0 +1,46 @@ +{ + "version": 1, + "processes": [ + { + "name": "om-backup-db-0-0", + "disabled": true, + "ignoredField": 3, + "timestamp": "2024-07-16T11:12:48.275Z", + "hostname": "om-backup-db-0-0-svc.lsierant-10.svc.cluster.local", + "args2_6": { + "net": { + "port": 27017, + "tls": { + "certificateKeyFile": "/var/lib/mongodb-automation/secrets/certs/LFKN25MS7RP2OSSJM3ORWIGPUW7VHJ24MOYDC2IXP77ADT45OR3A", + "mode": "preferTLS" + } + }, + "replication": { + "replSetName": "om-backup-db" + }, + "storage": { + "dbPath": "/data" + }, + "systemLog": { + "destination": "file", + "logAppend": false, + "path": "/var/log/mongodb-mms-automation/mongodb.log" + } + }, + "featureCompatibilityVersion": "6.0", + "processType": "mongod", + "version": "6.0.6-ent", + "authSchemaVersion": 5, + "newField1": 1, + "newField2": 2, + "newField3": 3, + "newField4": 4, + "newField5": 6, + "LogRotate": { + "timeThresholdHrs": 0, + "sizeThresholdMB": 0 + }, + "newField6": 6 + } + ] +} diff --git a/tools/diffwatch/cmd/diffwatch/resources/changed_4_ignored_only_ts_and_other.json b/tools/diffwatch/cmd/diffwatch/resources/changed_4_ignored_only_ts_and_other.json new file mode 100644 index 000000000..b455bc3ba --- /dev/null +++ b/tools/diffwatch/cmd/diffwatch/resources/changed_4_ignored_only_ts_and_other.json @@ -0,0 +1,46 @@ +{ + "version": 1, + "processes": [ + { + "name": "om-backup-db-0-0", + "disabled": true, + "ignoredField": 4, + "timestamp": "2024-07-16T11:13:48.275Z", + "hostname": "om-backup-db-0-0-svc.lsierant-10.svc.cluster.local", + "args2_6": { + "net": { + "port": 27017, + "tls": { + "certificateKeyFile": "/var/lib/mongodb-automation/secrets/certs/LFKN25MS7RP2OSSJM3ORWIGPUW7VHJ24MOYDC2IXP77ADT45OR3A", + "mode": "preferTLS" + } + }, + "replication": { + "replSetName": "om-backup-db" + }, + "storage": { + "dbPath": "/data" + }, + "systemLog": { + "destination": "file", + "logAppend": false, + "path": "/var/log/mongodb-mms-automation/mongodb.log" + } + }, + "featureCompatibilityVersion": "6.0", + "processType": "mongod", + "version": "6.0.6-ent", + "authSchemaVersion": 5, + "newField1": 1, + "newField2": 2, + "newField3": 3, + "newField4": 4, + "newField5": 6, + "LogRotate": { + "timeThresholdHrs": 0, + "sizeThresholdMB": 0 + }, + "newField6": 6 + } + ] +} diff --git a/tools/diffwatch/cmd/diffwatch/resources/changed_5.json b/tools/diffwatch/cmd/diffwatch/resources/changed_5.json new file mode 100644 index 000000000..4e5a6b151 --- /dev/null +++ b/tools/diffwatch/cmd/diffwatch/resources/changed_5.json @@ -0,0 +1,45 @@ +{ + "version": 1, + "processes": [ + { + "name": "om-backup-db-0-0", + "disabled": true, + "ignoredField": 3, + "hostname": "om-backup-db-0-0-svc.lsierant-10.svc.cluster.local", + "args2_6": { + "net": { + "port": 30000, + "tls": { + "certificateKeyFile": "/var/lib/mongodb-automation/secrets/certs/LFKN25MS7RP2OSSJM3ORWIGPUW7VHJ24MOYDC2IXP77ADT45OR3A", + "mode": "requireTLS" + } + }, + "replication": { + "replSetName": "om-backup-db" + }, + "storage": { + "dbPath": "/data" + }, + "systemLog": { + "destination": "file", + "logAppend": false, + "path": "/var/log/mongodb-mms-automation/mongodb.log" + } + }, + "featureCompatibilityVersion": "6.0", + "processType": "mongod", + "version": "6.0.6-ent", + "authSchemaVersion": 5, + "newField1": 1, + "newField2": 2, + "newField3": 3, + "newField4": 4, + "newField5": 6, + "LogRotate": { + "timeThresholdHrs": 0, + "sizeThresholdMB": 0 + }, + "newField6": 6 + } + ] +} diff --git a/tools/diffwatch/cmd/textdiff/main.go b/tools/diffwatch/cmd/textdiff/main.go new file mode 100644 index 000000000..a2997e8e0 --- /dev/null +++ b/tools/diffwatch/cmd/textdiff/main.go @@ -0,0 +1,37 @@ +package main + +import ( + "fmt" + "github.com/sergi/go-diff/diffmatchpatch" +) + +const ( + text1 = `Lorem ipsum dolor. +Lorem ipsum dolor3asdasdasd +asdasdasd. +Lorem ipsum dolor.≤≤ +Lorem ipsum2 dolor. +Lorem ipsum dolor. +` + text2 = `Lorem ipsum dolor. +Lorem ipsum dolor. +Lorem ipsum dolor. +Lorem ipsum dolor3asdasdasd +asdasdasd. +Lorem ipsum dolor. +Lorem ipsum3 dolor. +Lorem ipsum dolor. +A +B +C +` +) + +func main() { + dmp := diffmatchpatch.New() + + diffs := dmp.DiffMain(text1, text2, false) + + //diff2 := dmp.DiffCharsToLines(diffs, strings.Split(text2, "\n")) + fmt.Println(dmp.DiffPrettyText(diffs)) +} diff --git a/tools/diffwatch/go.mod b/tools/diffwatch/go.mod new file mode 100644 index 000000000..41e0688ea --- /dev/null +++ b/tools/diffwatch/go.mod @@ -0,0 +1,20 @@ +module github.com/mongodb/mongodb-kubernetes/diffwatch + +go 1.21 + +require ( + github.com/stretchr/testify v1.5.1 + github.com/yudai/gojsondiff v1.0.0 +) + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/mattn/go-colorable v0.1.13 // indirect + github.com/onsi/ginkgo v1.16.5 // indirect + github.com/onsi/gomega v1.27.10 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/sergi/go-diff v1.3.1 // indirect + github.com/yudai/golcs v0.0.0-20170316035057-ecda9a501e82 // indirect + github.com/yudai/pp v2.0.1+incompatible // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect +) diff --git a/tools/diffwatch/go.sum b/tools/diffwatch/go.sum new file mode 100644 index 000000000..8b90b9dca --- /dev/null +++ b/tools/diffwatch/go.sum @@ -0,0 +1,111 @@ +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= +github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= +github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= +github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-isatty v0.0.16 h1:bq3VjFmv/sOjHtdEhmkEV4x1AJtvUvOJ2PFAZ5+peKQ= +github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= +github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= +github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= +github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= +github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= +github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= +github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= +github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= +github.com/onsi/gomega v1.27.10 h1:naR28SdDFlqrG6kScpT8VWpu1xWY5nJRCF3XaYyBjhI= +github.com/onsi/gomega v1.27.10/go.mod h1:RsS8tutOdbdgzbPtzzATp12yT7kM5I5aElG3evPbQ0M= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/sergi/go-diff v1.3.1 h1:xkr+Oxo4BOQKmkn/B9eMK0g5Kg/983T9DqqPHwYqD+8= +github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/yudai/gojsondiff v1.0.0 h1:27cbfqXLVEJ1o8I6v3y9lg8Ydm53EKqHXAOMxEGlCOA= +github.com/yudai/gojsondiff v1.0.0/go.mod h1:AY32+k2cwILAkW1fbgxQ5mUmMiZFgLIV+FBNExI05xg= +github.com/yudai/golcs v0.0.0-20170316035057-ecda9a501e82 h1:BHyfKlQyqbsFN5p3IfnEUduWvb9is428/nNb5L3U01M= +github.com/yudai/golcs v0.0.0-20170316035057-ecda9a501e82/go.mod h1:lgjkn3NuSvDfVJdfcVVdX+jpBxNmX4rDAzaS45IcYoM= +github.com/yudai/pp v2.0.1+incompatible h1:Q4//iY4pNF6yPLZIigmvcl7k/bPgrcTPIFIcmawg5bI= +github.com/yudai/pp v2.0.1+incompatible/go.mod h1:PuxR/8QJ7cyCkFp/aUDS+JY727OFEZkTdatxwunjIkc= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.12.0 h1:cfawfvKITfUsFCeJIHJrbSxpeu/E81khclypR0GVT50= +golang.org/x/net v0.12.0/go.mod h1:zEVYFnQC7m/vmpQFELhcD1EWkZlX69l4oqgmer6hfKA= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= +golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.11.0 h1:LAntKIrcmeSKERyiOh0XMV39LXS8IE9UL2yP7+f5ij4= +golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/tools/diffwatch/lnav/automation-agent.json b/tools/diffwatch/lnav/automation-agent.json new file mode 100644 index 000000000..07428f528 --- /dev/null +++ b/tools/diffwatch/lnav/automation-agent.json @@ -0,0 +1,60 @@ +{ + "mdbc_automation_agent_log": { + "title": "MongoDB Automation Agent Log", + "description": "Log format for MongoDB Database Kubernetes Automation Agent", + "url": "https://docs.mongodb.com/", + "regex": { + "std": { + "pattern": "^\\[(?P\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{3}[+-]\\d{4})\\]\\s+\\[(?P[^\\]]+)\\]\\s+\\[(?P[^\\]]*)\\](?:\\s+<(?P[^>]*)>)?\\s*(?:\\[(?P[^\\]]*)\\]\\s*)?(?P.*)" + } + }, + "level-field": "level", + "level": { + "error": "error|Error|ERROR", + "warning": "warn|Warn|WARN|warning|Warning|WARNING", + "info": "info|Info|INFO|header\\.info", + "debug": "debug|Debug|DEBUG|\\.debug" + }, + "timestamp-field": "timestamp", + "timestamp-format": [ + "%Y-%m-%dT%H:%M:%S.%L%z" + ], + "value": { + "timestamp": { + "kind": "string", + "identifier": true + }, + "level": { + "kind": "string", + "identifier": true + }, + "source": { + "kind": "string", + "identifier": false + }, + "thread": { + "kind": "string", + "identifier": false + }, + "time_short": { + "kind": "string", + "identifier": false + }, + "body": { + "kind": "string", + "identifier": false + } + }, + "sample": [ + { + "line": "[2025-07-14T06:42:12.383+0000] [header.info] [::0] GitCommitId = ec1573c1fd5d7da3acab288d628b9e9eaaec6b2b" + }, + { + "line": "[2025-07-14T06:42:12.673+0000] [.debug] [src/main/cm.go:mainLoop:419] [06:42:12.673] Starting main loop iteration 17974 at 2025-07-14T06-42-12" + }, + { + "line": "[2025-07-14T06:42:12.748+0000] [.debug] [src/hosts/cacheddnsservice.go:lookup:64] [06:42:12.748] DNS cache hit on mdbc-rs-1.mdbc-rs-svc.ls.svc.cluster.local" + } + ] + } +} \ No newline at end of file diff --git a/tools/diffwatch/lnav/mongod.json b/tools/diffwatch/lnav/mongod.json new file mode 100644 index 000000000..6e1c1924f --- /dev/null +++ b/tools/diffwatch/lnav/mongod.json @@ -0,0 +1,79 @@ +{ + "mongod_post44": { + "title": "MongoDB 4.4 Log format", + "description": "New log format beginning with MongoDB 4.4", + "url": "https://docs.mongodb.com/manual/reference/log-messages/index.html#structured-logging", + "json": true, + "file-pattern": ".*(mongod_container|mongodb|.*-mongod-container)\\.log", + "line-format": [ + { "field": "__timestamp__" }, + " ", + { "field": "s", "min-width": 1, "align": "left" }, + " ", + { "field": "c", "min-width": 8, "align": "left" }, + " [", + { + "field": "ctx", + "max-width": 20, + "overflow": "truncate", + "align": "left" + }, + "] id:", + { + "field": "id" + }, + " (", + { + "field": "attr/remote", + "max-width": 22, + "align": "left" + }, + ") ", + { "field": "msg" }, + " ", + { "field": "attr" }, + " ", + { "field": "tags", "default-value": "" }, + " ", + { "field": "truncated", "default-value": "" }, + " ", + { "field": "size", "default-value": "" } + ], + "timestamp-field": "t/$date", + "level-field": "s", + "body-field": "msg", + "opid-field": "ctx", + "level": { + "fatal": "F", + "error": "E", + "warning": "W", + "info": "I", + "debug": "D1", + "debug2": "D2", + "debug3": "D3", + "debug4": "D4", + "debug5": "D5" + }, + "value": { + "t": { "kind": "json", "hidden": true }, + "t/$date": { "kind": "string" }, + "s": { "kind": "string" }, + "c": { "kind": "string", "identifier": true }, + "id": { "kind": "integer", "identifier": true, "hidden": true }, + "ctx": { + "kind": "string", + "identifier": true + }, + "tags": { "kind": "json", "hidden": true }, + "msg": { "kind": "string" }, + "attr": { "kind": "json" }, + "attr/remote": { + "kind": "string", + "collate": "ipaddress", + "identifier": true + }, + "truncated": { "kind": "json" }, + "size": { "kind": "integer" } + } + } +} \ No newline at end of file diff --git a/tools/diffwatch/lnav/mongot.json b/tools/diffwatch/lnav/mongot.json new file mode 100644 index 000000000..cb5d9c0d8 --- /dev/null +++ b/tools/diffwatch/lnav/mongot.json @@ -0,0 +1,54 @@ +{ + "mongot_logs": { + "title": "MongoDB Search (mongot) Log Format", + "description": "Log format for MongoDB Search service (mongot)", + "url": "https://www.mongodb.com/docs/atlas/atlas-search/", + "json": true, + "file-pattern": "mongot.*\\.log|.*mongot.*\\.log", + "line-format": [ + { "field": "__timestamp__" }, + " ", + { "field": "s", "min-width": 5, "align": "left" }, + " ", + { "field": "svc", "min-width": 6, "align": "left" }, + " [", + { + "field": "ctx", + "max-width": 25, + "overflow": "truncate", + "align": "left" + }, + "] ", + { + "field": "n", + "max-width": 30, + "overflow": "truncate", + "align": "left" + }, + ": ", + { "field": "msg" }, + " ", + { "field": "attr", "default-value": "" } + ], + "timestamp-field": "t", + "level-field": "s", + "body-field": "msg", + "opid-field": "ctx", + "level": { + "error": "ERROR", + "warning": "WARN", + "info": "INFO", + "debug": "DEBUG", + "trace": "TRACE" + }, + "value": { + "t": { "kind": "string", "identifier": true }, + "s": { "kind": "string", "identifier": true }, + "svc": { "kind": "string", "identifier": true }, + "ctx": { "kind": "string", "identifier": true }, + "n": { "kind": "string", "identifier": true }, + "msg": { "kind": "string" }, + "attr": { "kind": "json" } + } + } +} diff --git a/tools/diffwatch/lnav/readiness-probe.json b/tools/diffwatch/lnav/readiness-probe.json new file mode 100644 index 000000000..c276a8c35 --- /dev/null +++ b/tools/diffwatch/lnav/readiness-probe.json @@ -0,0 +1,56 @@ +{ + "readiness_probe_log": { + "title": "MongoDB Readiness Probe Log", + "description": "JSON log format for MongoDB Kubernetes readiness probe", + "url": "https://docs.mongodb.com/", + "json": true, + "line-format": [ + { + "field": "ts" + }, + " ", + { + "field": "level", + "min-width": 5, + "align": "left" + }, + " ", + { + "field": "msg" + } + ], + "level-field": "level", + "level": { + "error": "error|Error|ERROR", + "warning": "warn|Warn|WARN|warning|Warning|WARNING", + "info": "info|Info|INFO", + "debug": "debug|Debug|DEBUG" + }, + "timestamp-field": "ts", + "timestamp-format": [ + "%Y-%m-%dT%H:%M:%S.%LZ" + ], + "value": { + "level": { + "kind": "string", + "identifier": true + }, + "ts": { + "kind": "string", + "identifier": true + }, + "msg": { + "kind": "string", + "identifier": false + } + }, + "sample": [ + { + "line": "{\"level\":\"info\",\"ts\":\"2025-07-17T15:58:37.842Z\",\"msg\":\"logging configuration: &{Filename:/var/log/mongodb-mms-automation/readiness.log MaxSize:5 MaxAge:0 MaxBackups:5 LocalTime:false Compress:false size:0 file: mu:{state:0 sema:0} millCh: startMill:{done:{_:{} v:0} m:{state:0 sema:0}}}\"}" + }, + { + "line": "{\"level\":\"debug\",\"ts\":\"2025-07-17T15:59:07.926Z\",\"msg\":\"Indicated a wait Step, status: WaitAllRsMembersUp, started at 2025-07-17T15:58:47Z but hasn't finished yet. Marking the probe as ready\"}" + } + ] + } +} \ No newline at end of file diff --git a/tools/diffwatch/pkg/diffwatch/diffwatch.go b/tools/diffwatch/pkg/diffwatch/diffwatch.go new file mode 100644 index 000000000..85098889a --- /dev/null +++ b/tools/diffwatch/pkg/diffwatch/diffwatch.go @@ -0,0 +1,104 @@ +package diffwatch + +import ( + "context" + "fmt" + "log" + "os" + "path" + "regexp" + "time" +) + +var coloredDiffRegexPattern = regexp.MustCompile(`^\x1b(\[[0-9;]*m)+([+-])(.*)$`) + +var DefaultWatchInterval = time.Second * 3 +var DefaultProgressInterval = time.Second * 10 + +// WatchFileChangesPeriodically receives json file events on parsedFileChannel and invokes jsonDiffer.FileChangedHandler with it +func WatchFileChangesPeriodically(ctx context.Context, filePath string, parsedFileChannel <-chan ParsedFileWrapper, fileChangedHandler func(ParsedFileWrapper) (bool, bool, error)) error { + progressMsgTicker := time.NewTicker(DefaultProgressInterval) + defer progressMsgTicker.Stop() + + watchingMsgFunc := func() { + fmt.Printf("%s Watching %s: ", time.Now().Format("2006/01/02 15:04:05"), filePath) + } + + watchingMsgProgressFunc := func() { + fmt.Printf(".") + } + watchingMsgProgressForIgnoredFunc := func() { + fmt.Printf("x") + } + + watchIntervalCallback := func(parsedFile ParsedFileWrapper) { + modified, shouldIgnoreDiff, err := fileChangedHandler(parsedFile) + if err != nil { + fmt.Println() + fmt.Printf("%v", err) + } + + if modified && !shouldIgnoreDiff { + watchingMsgFunc() + } + if shouldIgnoreDiff { + watchingMsgProgressForIgnoredFunc() + } + } + + watchingMsgFunc() + for { + select { + case parsedFile := <-parsedFileChannel: + if parsedFile.err != nil { + return parsedFile.err + } else { + watchIntervalCallback(parsedFile) + } + case <-ctx.Done(): + fmt.Println() + log.Println("exiting WatchFileChangesPeriodically routine") + return nil + case <-progressMsgTicker.C: + watchingMsgProgressFunc() + } + } +} + +func WriteDiffFiles(destDir, fileName string) WriteDiffFunc { + counter := 0 + return func(currentSourceFileString, fullDiffString, shortDiffString string, shouldIgnoreDiff bool) error { + if destDir != "" { + timestampStr := currentTimeStampString() + fullDiffFileName := fmt.Sprintf("%s/%s_diff_full_%s_%d%s", destDir, fileName, timestampStr, counter, path.Ext(fileName)) + shortDiffFileName := fmt.Sprintf("%s/%s_diff_short_%s_%d%s", destDir, fileName, timestampStr, counter, path.Ext(fileName)) + currentSourceFileName := fmt.Sprintf("%s/%s_%s_%d%s", destDir, fileName, timestampStr, counter, path.Ext(fileName)) + counter++ + + if err := os.WriteFile(currentSourceFileName, []byte(currentSourceFileString), 0644); err != nil { + fmt.Println(err) + } + + if err := os.WriteFile(fullDiffFileName, []byte(fullDiffString), 0644); err != nil { + fmt.Println(err) + } + + if !shouldIgnoreDiff { + if err := os.WriteFile(shortDiffFileName, []byte(shortDiffString), 0644); err != nil { + fmt.Println(err) + } + } + } + + if !shouldIgnoreDiff { + fmt.Println() + fmt.Printf(shortDiffString) + } + + return nil + } +} + +func currentTimeStampString() string { + return time.Now().Format("20060102_15_04_05") +} diff --git a/tools/diffwatch/pkg/diffwatch/file_watcher.go b/tools/diffwatch/pkg/diffwatch/file_watcher.go new file mode 100644 index 000000000..f5d617c19 --- /dev/null +++ b/tools/diffwatch/pkg/diffwatch/file_watcher.go @@ -0,0 +1,62 @@ +package diffwatch + +import ( + "context" + "encoding/json" + "fmt" + "log" + "os" + "time" +) + +type ParsedFileWrapper struct { + name string + content string + contentAsMap map[string]interface{} + err error +} + +func tryToParseJsonFile(filePath string) (ParsedFileWrapper, error) { + fileAsMap := map[string]interface{}{} + var bytes []byte + var err error + for i := 0; i < 5; i++ { + bytes, err = os.ReadFile(filePath) + if err != nil { + time.Sleep(time.Millisecond * 10) + continue + } + + if err = json.Unmarshal(bytes, &fileAsMap); err != nil { + time.Sleep(time.Millisecond * 10) + continue + } + break + } + + if err != nil { + return ParsedFileWrapper{}, fmt.Errorf("error unmarshalling json: \n%s\nerror: %w", string(bytes), err) + } + + return ParsedFileWrapper{name: filePath, content: string(bytes), contentAsMap: fileAsMap}, nil +} + +// ReadAndParseFilePeriodically is a blocking function that parses filePath's content periodically with watchInterval and sends the result into parsedFileChannel. +func ReadAndParseFilePeriodically(ctx context.Context, filePath string, watchInterval time.Duration, parsedFileChannel chan<- ParsedFileWrapper) { + watchTicker := time.NewTicker(watchInterval) + defer watchTicker.Stop() + + for { + select { + case <-watchTicker.C: + parsedFile, err := tryToParseJsonFile(filePath) + if err != nil { + parsedFileChannel <- ParsedFileWrapper{err: err} + } + parsedFileChannel <- parsedFile + case <-ctx.Done(): + log.Println("exiting fileWatcher routine") + return + } + } +} diff --git a/tools/diffwatch/pkg/diffwatch/grep.go b/tools/diffwatch/pkg/diffwatch/grep.go new file mode 100644 index 000000000..ee3099c71 --- /dev/null +++ b/tools/diffwatch/pkg/diffwatch/grep.go @@ -0,0 +1,116 @@ +package diffwatch + +import ( + "fmt" + "regexp" + "slices" + "strings" +) + +// Grep implements grepping str with specified context window. +// Similar to grep -A linesAfterMatch -B linesBeforeMatch. +func Grep(str string, pattern *regexp.Regexp, linesBeforeMatch int, linesAfterMatch int, ignores []string) (string, bool) { + if len(str) == 0 { + return "", false + } + if linesBeforeMatch < 0 { + linesBeforeMatch = 0 + } + if linesAfterMatch < 0 { + linesAfterMatch = 0 + } + + var ignoreRegex []*regexp.Regexp + for _, ignore := range ignores { + r, err := regexp.Compile(ignore) + if err != nil { + panic(fmt.Errorf("error compiling pattern: %s: %w", ignore, err)) + } + ignoreRegex = append(ignoreRegex, r) + } + + trailingNewline := str[len(str)-1] == '\n' + + // contains indexes where there is a match + var matchIndexes []int + var ignoredIndexes []int + indexesMatchedBy := map[int]map[*regexp.Regexp]struct{}{} + lines := strings.Split(str, "\n") + for idx := 0; idx < len(lines); idx++ { + if matches := pattern.FindStringSubmatch(lines[idx]); matches != nil { + matchIndexes = append(matchIndexes, idx) + if len(matches) < 3 { + continue + } + + isRemoval := matches[2] == "-" + restOfTheLine := matches[3] + for _, r := range ignoreRegex { + if r.MatchString(restOfTheLine) { + if isRemoval { + if indexesMatchedBy[idx] == nil { + indexesMatchedBy[idx] = map[*regexp.Regexp]struct{}{} + } + indexesMatchedBy[idx][r] = struct{}{} + } else if previousLineMatchedRegexes, ok := indexesMatchedBy[idx-1]; ok { + if _, ok := previousLineMatchedRegexes[r]; ok { + // previous line was matched by the same regex and it was removal + // so we have consecutive removal and addition with the same match so we can ignore it + ignoredIndexes = append(ignoredIndexes, idx-1, idx) + } + } + } + } + } + } + + shouldIgnoreDiff := slices.Equal(matchIndexes, ignoredIndexes) + + if len(matchIndexes) == 0 { + return "", shouldIgnoreDiff + } + + printSeparator := false + strBuilder := strings.Builder{} + currentMatchIndex := 0 + for idx := 0; idx < len(lines); idx++ { + if idx > matchIndexes[currentMatchIndex]+linesAfterMatch { + // if we're outside print window of the current match we need to jump to the next match index + if currentMatchIndex < len(matchIndexes)-1 { + currentMatchIndex++ + } else { + // nothing will be printed from now on because there are no other matches + break + } + } else if currentMatchIndex < len(matchIndexes)-1 && idx == matchIndexes[currentMatchIndex+1] { + // move the current match if we're at the index where there is another match + currentMatchIndex++ + } + + printLine := false + // are we in "before" window + if idx <= matchIndexes[currentMatchIndex] && idx >= (matchIndexes[currentMatchIndex]-linesBeforeMatch) { + printLine = true + } + // are we in "after" window + if idx >= matchIndexes[currentMatchIndex] && idx <= (matchIndexes[currentMatchIndex]+linesAfterMatch) { + printLine = true + } + + if printLine { + strBuilder.WriteString(lines[idx]) + if idx != len(lines)-1 || trailingNewline { + strBuilder.WriteString("\n") + } + // trigger printing match separator only after a match + printSeparator = true + } else if (linesBeforeMatch > 0 || linesAfterMatch > 0) && printSeparator { + // we left the context window and there will be another matches, so we need to put the separator + strBuilder.WriteString("--\n") + // print separator once, it will be enabled again when another match is found + printSeparator = false + } + } + + return strBuilder.String(), shouldIgnoreDiff +} diff --git a/tools/diffwatch/pkg/diffwatch/grep_test.go b/tools/diffwatch/pkg/diffwatch/grep_test.go new file mode 100644 index 000000000..5556c58ed --- /dev/null +++ b/tools/diffwatch/pkg/diffwatch/grep_test.go @@ -0,0 +1,140 @@ +package diffwatch + +import ( + "os" + "regexp" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestGrepWithBeforeAndAfter(t *testing.T) { + input := + `0 x +1 y +2 y +3 b +4 b +5 y +6 y +7 x +8 y +9 y +10 b` + expectedOutput := + `1 y +2 y +3 b +4 b +5 y +6 y +-- +8 y +9 y +10 b` + + actualOutput, _ := Grep(input, regexp.MustCompile("b"), 2, 2, nil) + assert.Equal(t, expectedOutput, actualOutput) +} + +func TestGrepWithoutBeforeAndAfter(t *testing.T) { + input := + `0 x +1 x +2 x +3 b +4 b +5 x +6 x +7 x +8 x +9 x +10 b` + + // no separators when no linesBefore or after + expectedOutput := + `3 b +4 b +10 b` + + actualOutput, _ := Grep(input, regexp.MustCompile("b"), 0, 0, nil) + assert.Equal(t, expectedOutput, actualOutput) +} + +func TestGrepWithoutBeforeAndWithAfter(t *testing.T) { + input := + `0 x +1 b +2 y +3 y +4 y` + expectedOutput := + `1 b +2 y +3 y +4 y` + + actualOutput, _ := Grep(input, regexp.MustCompile("b"), 0, 100, nil) + assert.Equal(t, expectedOutput, actualOutput) +} + +func TestGrepNoMatches(t *testing.T) { + input := + `0 x +1 b +4 y` + expectedOutput := "" + + actualOutput, _ := Grep(input, regexp.MustCompile("X"), 0, 100, nil) + assert.Equal(t, expectedOutput, actualOutput) +} + +func TestGrepAllMatches(t *testing.T) { + input := + `0 x +1 b +4 y` + expectedOutput := input + + actualOutput, _ := Grep(input, regexp.MustCompile("\\d\\s([xby])"), 0, 0, nil) + assert.Equal(t, expectedOutput, actualOutput) +} + +func TestGrepWithColorCodes(t *testing.T) { + inputBytes, err := os.ReadFile("resources/json_with_colors.json") + require.NoError(t, err) + expectedOutputBytes, err := os.ReadFile("resources/json_with_colors_expected_output.json") + require.NoError(t, err) + actualOutput, _ := Grep(string(inputBytes), coloredDiffRegexPattern, 1, 1, nil) + assert.Equal(t, string(expectedOutputBytes), actualOutput) +} + +func TestGrepWithColorAndIgnores(t *testing.T) { + inputBytes, err := os.ReadFile("resources/json_with_colors_ignored_only.json") + require.NoError(t, err) + expectedOutputBytes, err := os.ReadFile("resources/json_with_colors_ignored_only_expected_output.json") + require.NoError(t, err) + t.Run("check diff without ignores", func(t *testing.T) { + actualOutput, actualShouldIgnoreDiff := Grep(string(inputBytes), coloredDiffRegexPattern, 1, 1, nil) + assert.Equal(t, string(expectedOutputBytes), actualOutput) + assert.False(t, actualShouldIgnoreDiff) + }) + t.Run("check diff with incomplete ignores, so full diff is triggered", func(t *testing.T) { + actualOutput, actualShouldIgnoreDiff := Grep(string(inputBytes), coloredDiffRegexPattern, 1, 1, []string{"mode", "disabled"}) + assert.Equal(t, string(expectedOutputBytes), actualOutput) + assert.False(t, actualShouldIgnoreDiff) + }) + + t.Run("check diff with all ignores matched, diff should be ignored", func(t *testing.T) { + actualOutput, actualShouldIgnoreDiff := Grep(string(inputBytes), coloredDiffRegexPattern, 1, 1, []string{"mode", "disabled", "timestamp"}) + assert.Equal(t, string(expectedOutputBytes), actualOutput) + assert.True(t, actualShouldIgnoreDiff) + }) + + t.Run("check diff with all ignores including timestamp regex matches, diff should be ignored", func(t *testing.T) { + actualOutput, actualShouldIgnoreDiff := Grep(string(inputBytes), coloredDiffRegexPattern, 1, 1, []string{"mode", "disabled", `\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}`}) + assert.Equal(t, string(expectedOutputBytes), actualOutput) + assert.True(t, actualShouldIgnoreDiff) + }) +} diff --git a/tools/diffwatch/pkg/diffwatch/json_differ.go b/tools/diffwatch/pkg/diffwatch/json_differ.go new file mode 100644 index 000000000..9462145bc --- /dev/null +++ b/tools/diffwatch/pkg/diffwatch/json_differ.go @@ -0,0 +1,75 @@ +package diffwatch + +import ( + fmt "fmt" + "github.com/yudai/gojsondiff" + "github.com/yudai/gojsondiff/formatter" +) + +// JsonDiffer is a stateful object maintaining the latest json object and performs diffs against it. +type JsonDiffer struct { + // previous json + oldFileMap map[string]interface{} + // like grep -B + linesBefore int + // like grep -A + linesAfter int + // function called when there are diffs to be saved + diffWriter WriteDiffFunc + ignores []string +} + +type WriteDiffFunc func(string, string, string, bool) error + +func NewJsonDiffer(linesBefore int, linesAfter int, diffWriter WriteDiffFunc, ignores []string) (*JsonDiffer, error) { + return &JsonDiffer{ + linesAfter: linesAfter, + linesBefore: linesBefore, + ignores: ignores, + oldFileMap: map[string]interface{}{}, + diffWriter: diffWriter, + }, nil +} + +// FileChangedHandler is invoked with parsed file content. +// The file doesn't necessarily need to contain different content. +// If not changed (e.g. when called by periodic file watcher) - nothing will happen. +func (j *JsonDiffer) FileChangedHandler(parsedFile ParsedFileWrapper) (bool, bool, error) { + if parsedFile.err != nil { + return false, false, parsedFile.err + } + + config := formatter.AsciiFormatterConfig{ + ShowArrayIndex: false, + Coloring: true, + } + + if len(parsedFile.content) == 0 { + return false, false, fmt.Errorf("empty file") + } + + differ := gojsondiff.New() + diff := differ.CompareObjects(j.oldFileMap, parsedFile.contentAsMap) + if !diff.Modified() { + return false, false, nil + } + jsonFormatter := formatter.NewAsciiFormatter(j.oldFileMap, config) + + diffString, err := jsonFormatter.Format(diff) + if err != nil { + // shouldn't happen + panic(err) + } + + shortDiffString, shouldIgnoreDiff := Grep(diffString, coloredDiffRegexPattern, j.linesBefore, j.linesAfter, j.ignores) + + if len(diffString) > 0 { + if err := j.diffWriter(parsedFile.content, diffString, shortDiffString, shouldIgnoreDiff); err != nil { + return false, false, err + } + } + + j.oldFileMap = parsedFile.contentAsMap + + return true, shouldIgnoreDiff, nil +} diff --git a/tools/diffwatch/pkg/diffwatch/resources/json_with_colors.json b/tools/diffwatch/pkg/diffwatch/resources/json_with_colors.json new file mode 100644 index 000000000..a886838a8 --- /dev/null +++ b/tools/diffwatch/pkg/diffwatch/resources/json_with_colors.json @@ -0,0 +1,40 @@ + { + "processes": [ + { + "LogRotate": { + "sizeThresholdMB": 0, + "timeThresholdHrs": 0 + }, + "args2_6": { + "net": { + "port": 27017, + "tls": { + "certificateKeyFile": "/var/lib/mongodb-automation/secrets/certs/LFKN25MS7RP2OSSJM3ORWIGPUW7VHJ24MOYDC2IXP77ADT45OR3A", +- "mode": "requireTLS" ++ "mode": "preferTLS" + } + }, + "replication": { + "replSetName": "om-backup-db" + }, + "storage": { + "dbPath": "/data" + }, + "systemLog": { + "destination": "file", + "logAppend": false, + "path": "/var/log/mongodb-mms-automation/mongodb.log" + } + }, + "authSchemaVersion": 5, +- "disabled": false, ++ "disabled": true, + "featureCompatibilityVersion": "6.0", + "hostname": "om-backup-db-0-0-svc.lsierant-10.svc.cluster.local", + "name": "om-backup-db-0-0", + "processType": "mongod", ++ "newField1": 1 + } + ], + "version": 1 + } diff --git a/tools/diffwatch/pkg/diffwatch/resources/json_with_colors_expected_output.json b/tools/diffwatch/pkg/diffwatch/resources/json_with_colors_expected_output.json new file mode 100644 index 000000000..116d62fc2 --- /dev/null +++ b/tools/diffwatch/pkg/diffwatch/resources/json_with_colors_expected_output.json @@ -0,0 +1,13 @@ + "certificateKeyFile": "/var/lib/mongodb-automation/secrets/certs/LFKN25MS7RP2OSSJM3ORWIGPUW7VHJ24MOYDC2IXP77ADT45OR3A", +- "mode": "requireTLS" ++ "mode": "preferTLS" + } +-- + "authSchemaVersion": 5, +- "disabled": false, ++ "disabled": true, + "featureCompatibilityVersion": "6.0", +-- + "processType": "mongod", ++ "newField1": 1 + } diff --git a/tools/diffwatch/pkg/diffwatch/resources/json_with_colors_ignored_only.json b/tools/diffwatch/pkg/diffwatch/resources/json_with_colors_ignored_only.json new file mode 100644 index 000000000..9942343b1 --- /dev/null +++ b/tools/diffwatch/pkg/diffwatch/resources/json_with_colors_ignored_only.json @@ -0,0 +1,41 @@ + { + "processes": [ + { + "LogRotate": { + "sizeThresholdMB": 0, + "timeThresholdHrs": 0 + }, + "args2_6": { + "net": { + "port": 27017, + "tls": { + "certificateKeyFile": "/var/lib/mongodb-automation/secrets/certs/LFKN25MS7RP2OSSJM3ORWIGPUW7VHJ24MOYDC2IXP77ADT45OR3A", +- "mode": "requireTLS" ++ "mode": "preferTLS" + } + }, + "replication": { + "replSetName": "om-backup-db" + }, + "storage": { + "dbPath": "/data" + }, + "systemLog": { + "destination": "file", + "logAppend": false, + "path": "/var/log/mongodb-mms-automation/mongodb.log" + } + }, + "authSchemaVersion": 5, +- "disabled": false, ++ "disabled": true, + "featureCompatibilityVersion": "6.0", + "hostname": "om-backup-db-0-0-svc.lsierant-10.svc.cluster.local", + "name": "om-backup-db-0-0", + "processType": "mongod", +- "timestamp": "2024-07-16T12:34:50+02:00",, ++ "timestamp": "2024-07-18T12:35:50+01:00",, + } + ], + "version": 1 + } diff --git a/tools/diffwatch/pkg/diffwatch/resources/json_with_colors_ignored_only_expected_output.json b/tools/diffwatch/pkg/diffwatch/resources/json_with_colors_ignored_only_expected_output.json new file mode 100644 index 000000000..984360d6d --- /dev/null +++ b/tools/diffwatch/pkg/diffwatch/resources/json_with_colors_ignored_only_expected_output.json @@ -0,0 +1,14 @@ + "certificateKeyFile": "/var/lib/mongodb-automation/secrets/certs/LFKN25MS7RP2OSSJM3ORWIGPUW7VHJ24MOYDC2IXP77ADT45OR3A", +- "mode": "requireTLS" ++ "mode": "preferTLS" + } +-- + "authSchemaVersion": 5, +- "disabled": false, ++ "disabled": true, + "featureCompatibilityVersion": "6.0", +-- + "processType": "mongod", +- "timestamp": "2024-07-16T12:34:50+02:00",, ++ "timestamp": "2024-07-18T12:35:50+01:00",, + } diff --git a/tools/diffwatch/pkg/diffwatch/stream_watcher.go b/tools/diffwatch/pkg/diffwatch/stream_watcher.go new file mode 100644 index 000000000..39905e6f3 --- /dev/null +++ b/tools/diffwatch/pkg/diffwatch/stream_watcher.go @@ -0,0 +1,67 @@ +package diffwatch + +import ( + "context" + "encoding/json" + "io" + "log" +) + +func parseJsonObjectsFromStream(ctx context.Context, input io.Reader, name string, parsedObjectChan chan<- ParsedFileWrapper) { + jsonDecoder := json.NewDecoder(input) + for { + select { + case <-ctx.Done(): + log.Println("exiting json stream decoder routine") + return + default: + } + + mapObj := map[string]interface{}{} + var parsedFile ParsedFileWrapper + if err := jsonDecoder.Decode(&mapObj); err != nil { + parsedFile = ParsedFileWrapper{err: err} + } else { + contentBytes, err := json.MarshalIndent(mapObj, "", " ") + if err != nil { + panic(err) + } + parsedFile = ParsedFileWrapper{ + name: name, + content: string(contentBytes), + contentAsMap: mapObj, + } + } + + select { + case <-ctx.Done(): + log.Println("exiting json stream decoder routine") + return + case parsedObjectChan <- parsedFile: + continue + } + } +} + +func ReadAndParseFromStream(ctx context.Context, input io.Reader, name string, parsedFileChannel chan<- ParsedFileWrapper) { + parsedObjectChan := make(chan ParsedFileWrapper) + + // Because jsonDecoder.Decode is blocking without any means to cancel, we need to read json objects in + // a separate goroutine to let this function to exit when the context is canceled. + go parseJsonObjectsFromStream(ctx, input, name, parsedObjectChan) + + for { + select { + case parsedObject := <-parsedObjectChan: + select { + case <-ctx.Done(): + log.Println("exiting ReadAndParseFromStream routine 2") + case parsedFileChannel <- parsedObject: + continue + } + case <-ctx.Done(): + log.Println("exiting ReadAndParseFromStream routine") + return + } + } +} diff --git a/tools/diffwatch/pkg/diffwatch/stream_watcher_test.go b/tools/diffwatch/pkg/diffwatch/stream_watcher_test.go new file mode 100644 index 000000000..c9a32e2c3 --- /dev/null +++ b/tools/diffwatch/pkg/diffwatch/stream_watcher_test.go @@ -0,0 +1,130 @@ +package diffwatch + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "log" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// BlockingConcurrentBuffer is a test buffer wrapper that blocks Read operation +// if there is no available data to simulate os.Stdin behavior. +type BlockingConcurrentBuffer struct { + buf bytes.Buffer + mutex sync.Mutex + cond *sync.Cond + closed bool +} + +func (c *BlockingConcurrentBuffer) Write(p []byte) (n int, err error) { + c.cond.L.Lock() + defer c.cond.L.Unlock() + n, err = c.buf.Write(p) + fmt.Printf("Written: \n<%s>\n", string(p[:n])) + c.cond.Signal() + + return n, err +} + +func (c *BlockingConcurrentBuffer) Read(p []byte) (n int, err error) { + c.cond.L.Lock() + defer c.cond.L.Unlock() + if c.buf.Len() == 0 { + fmt.Printf("Empty buffer, waiting on read...\n") + c.cond.Wait() + } + n, err = c.buf.Read(p) + // if the buffer is not closed, we don't want to return EOF + // os.Stdin is not returning EOF on a lack of data and EOF breaks json.Decoder + if !c.closed && err == io.EOF { + err = nil + } + fmt.Printf("Read: \n<%s>\n", string(p[:n])) + return n, err +} + +func (c *BlockingConcurrentBuffer) Close() { + c.cond.L.Lock() + defer c.cond.L.Unlock() + c.closed = true +} + +func NewBlockingConcurrentBuffer() *BlockingConcurrentBuffer { + return &BlockingConcurrentBuffer{ + cond: sync.NewCond(&sync.Mutex{}), + } +} + +func TestReadAndParseFromStream(t *testing.T) { + ctx := context.Background() + objects := []map[string]interface{}{ + {"a": "1"}, + {"b": "2"}, + {"a": "3"}, + {"a": "4", + "b": map[string]interface{}{ + "c": map[string]interface{}{ + "d": []interface{}{"e", "f"}, + }, + }, + }, + } + + buf := NewBlockingConcurrentBuffer() + + // write the serialized data one by one, in chunks in the background + go func() { + for _, obj := range objects { + jsonBytes, err := json.MarshalIndent(obj, "", " ") + require.NoError(t, err) + // write in 2 chunks + if len(jsonBytes) > 1 { + _, err = buf.Write(jsonBytes[0 : len(jsonBytes)/2]) + _, err = buf.Write(jsonBytes[len(jsonBytes)/2:]) + } else { + _, err = buf.Write(jsonBytes) + } + + time.Sleep(time.Millisecond * 10) + require.NoError(t, err) + } + buf.Close() + }() + + parsedFileChannel := make(chan ParsedFileWrapper) + // read from buf in the background and send parsed objects to parsedFileChannel + go ReadAndParseFromStream(ctx, buf, "test", parsedFileChannel) + + // timeout here is only to not hang + timeoutCtx, cancel := context.WithTimeout(ctx, time.Millisecond*500) + + defer cancel() + var parsedFiles []ParsedFileWrapper + +forLabel: + for { + select { + case parsedFile := <-parsedFileChannel: + assert.NoError(t, parsedFile.err, "error on file #%d: %w", len(parsedFiles), parsedFile.err) + parsedFiles = append(parsedFiles, parsedFile) + case <-timeoutCtx.Done(): + log.Println("timeout done") + buf.Close() + break forLabel + } + } + + assert.Len(t, parsedFiles, len(objects)) + + for i, parsedFile := range parsedFiles { + assert.Equal(t, objects[i], parsedFile.contentAsMap) + } +} diff --git a/tools/diffwatch/retry_cmd.sh b/tools/diffwatch/retry_cmd.sh new file mode 100644 index 000000000..7a2a05b0d --- /dev/null +++ b/tools/diffwatch/retry_cmd.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# Function to retry a command with a configurable delay +# Usage: retry_cmd "command to execute" delay_seconds +retry_cmd() { + local cmd="$1" + local delay="${2:-3}" + + while true; do + eval "$cmd" + echo "Retrying..." + sleep "$delay" + done +} \ No newline at end of file diff --git a/tools/mdbdebug/Dockerfile b/tools/mdbdebug/Dockerfile new file mode 100644 index 000000000..8ad02b629 --- /dev/null +++ b/tools/mdbdebug/Dockerfile @@ -0,0 +1,14 @@ +FROM debian:bullseye-slim + +# Install dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + procps \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app +COPY bin_linux/mdbdebug /app/ +RUN chmod +x /app/mdbdebug + +ENTRYPOINT ["/app/mdbdebug"] diff --git a/tools/mdbdebug/attach.sh b/tools/mdbdebug/attach.sh new file mode 100755 index 000000000..0515cfee8 --- /dev/null +++ b/tools/mdbdebug/attach.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash + +# This script is for attaching to a previously created debugging pod. + +set -Eeou pipefail + +if ! which fzf &>/dev/null ; then + echo "you need to install fzf:" + echo " brew install fzf" + exit 1 +fi + +cmd_file="${PROJECT_DIR}/.generated/mdb-debug.attach" + +parse_json_to_map() { + local json="$1" + declare -n map_ref="$2" + + while IFS=$'\t' read -r key value; do + # shellcheck disable=SC2034 + map_ref["${key}"]="${value}" + done < <(echo "${json}" | jq -r 'to_entries | .[] | "\(.key)\t\(.value)"') +} + +print_map() { + declare -n map_ref="$1" + for key in "${!map_ref[@]}"; do + echo "${key}: ${map_ref[$key]}" + done +} + +attach() { + commands="$(jq -r '.[] | "\(.shortName): debug pod \(.namespace)/\(.podName) (will attach to \(.debugPodName))"' < "${cmd_file}")" + short_name=$(echo "${commands}" | fzf -n 1 -d ':' | cut -d ':' -f1) + echo "Picked pod to debug: ${short_name}" + + attach_json="$(jq -r ".[] | select(.shortName == \"${short_name}\")" < "${cmd_file}")" + declare -A attach_map + parse_json_to_map "${attach_json}" attach_map + echo "Details of the selected attach json: " + print_map attach_map + debug_sts_name="${attach_map["debugStsName"]}" + debug_pod_name="${attach_map["debugPodName"]}" + debug_pod_context="${attach_map["debugPodContext"]}" + if [[ "${debug_pod_context}" == "__default" ]]; then + debug_pod_context="$(kubectl config current-context)" + fi + cmd="${attach_map["command"]}" + namespace="${attach_map["namespace"]}" + + if [[ "${cmd}" == "" ]]; then + echo "Couldn't find attach command from the listed below:" + cat "${cmd_file}" + echo + exit 1 + fi + + echo "Scaling statefulset $ to 1 replicas" + kubectl --context "${debug_pod_context}" --namespace "${namespace}" scale statefulsets "${debug_sts_name}" --replicas=1 + kubectl --context "${debug_pod_context}" --namespace "${namespace}" rollout status statefulset "${debug_sts_name}" --timeout=60s + kubectl --context "${debug_pod_context}" --namespace "${namespace}" -it exec "${debug_pod_name}" -- tmux attach +} + +pick_deployment() { + configmaps=() + while IFS= read -r cm; do + configmaps+=("${cm}") + done < <(kubectl get configmaps --namespace "${NAMESPACE}" -o jsonpath='{.items[*].metadata.name}' | tr ' ' '\n' | grep "attach-commands") + + config_map_to_deployment_name_regex='s/mdb-debug-attach-commands-\([a-z0-9.-]*\)/\1/g' + if [[ ${#configmaps[@]} == 0 ]]; then + echo "No attach commands config maps found!" >&2 + elif [[ ${#configmaps[@]} == 1 ]]; then + echo "Found one config map with attach commands: ${configmaps[0]}" >&2 + echo -n "${configmaps[0]}" | sed "${config_map_to_deployment_name_regex}" + else + echo "Found multiple config maps with attach commands: ${configmaps[*]}" >&2 + deployments=$(echo -n "${configmaps[*]}" | tr ' ' '\n' | sed "${config_map_to_deployment_name_regex}") + picked_deployment=$( + fzf -d ' ' --header-first --layout=reverse --header "Pick deployment first:" <<< "${deployments}" \ + --preview "kubectl get cm mdb-debug-attach-commands-{} --namespace ${NAMESPACE} -o jsonpath='{.data.attachCommands}'" \ + ) + if [[ ${picked_deployment} != "" ]]; then + echo "${picked_deployment}" + fi + fi +} + +deployment=$(pick_deployment) +if [[ ${deployment} == "" ]]; then + echo "No deployment picked. Exiting." + exit 1 +fi + +echo "Picked deployment: ${deployment}" +kubectl get cm "mdb-debug-attach-commands-${deployment}" --namespace "${NAMESPACE}" -o jsonpath='{.data.commands}' >"${cmd_file}" +attach diff --git a/tools/mdbdebug/build.sh b/tools/mdbdebug/build.sh new file mode 100755 index 000000000..cf39133e7 --- /dev/null +++ b/tools/mdbdebug/build.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +set -Eeou pipefail + +script_name=$(readlink -f "${BASH_SOURCE[0]}") +script_dir=$(dirname "${script_name}") + +pushd "${script_dir}" >/dev/null 2>&1 +mkdir -p bin +mkdir -p bin_linux + +echo "Building mdbdebug from $(pwd) directory" +go build -o bin ./... + +echo "Building mdbdebug for linux from $(pwd) directory" +GOOS=linux GOARCH=amd64 go build -o bin_linux ./... + +echo "Copying mdbdebug from to ${PROJECT_DIR}/bin" +cp bin/mdbdebug "${PROJECT_DIR}/bin" + +echo "Copying attach.sh and watch.sh to ${PROJECT_DIR}/bin" +cp attach.sh "${PROJECT_DIR}/bin" + +popd >/dev/null diff --git a/tools/mdbdebug/build_docker.sh b/tools/mdbdebug/build_docker.sh new file mode 100755 index 000000000..a098e216f --- /dev/null +++ b/tools/mdbdebug/build_docker.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +set -Eeou pipefail + +# Default image name and tag +IMAGE_NAME=${IMAGE_NAME:-"mdbdebug"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +REGISTRY=${REGISTRY:-"quay.io/lsierant"} + +script_name=$(readlink -f "${BASH_SOURCE[0]}") +script_dir=$(dirname "${script_name}") + +# Change to the script directory +pushd "${script_dir}" >/dev/null 2>&1 + +./build.sh + +echo "Building Docker image ${IMAGE_NAME}:${IMAGE_TAG}" +if [ -z "${REGISTRY}" ]; then + docker build -t "${IMAGE_NAME}:${IMAGE_TAG}" . +else + docker build -t "${REGISTRY}/${IMAGE_NAME}:${IMAGE_TAG}" . + + echo "Pushing Docker image ${REGISTRY}/${IMAGE_NAME}:${IMAGE_TAG}" + docker push "${REGISTRY}/${IMAGE_NAME}:${IMAGE_TAG}" +fi + +popd >/dev/null 2>&1 diff --git a/tools/mdbdebug/cmd/mdbdebug/debug.go b/tools/mdbdebug/cmd/mdbdebug/debug.go new file mode 100644 index 000000000..1212e9314 --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/debug.go @@ -0,0 +1,490 @@ +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + mdbv1 "github.com/mongodb/mongodb-kubernetes/api/v1/mdb" + omv1 "github.com/mongodb/mongodb-kubernetes/api/v1/om" + searchv1 "github.com/mongodb/mongodb-kubernetes/api/v1/search" + "github.com/mongodb/mongodb-kubernetes/controllers/om" + "github.com/mongodb/mongodb-kubernetes/controllers/operator" + "github.com/mongodb/mongodb-kubernetes/controllers/operator/secrets" + mdbcv1 "github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/api/v1" + kubernetesClient "github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/kube/client" + "github.com/mongodb/mongodb-kubernetes/pkg/multicluster" + "github.com/mongodb/mongodb-kubernetes/pkg/util" + "go.uber.org/zap" + "golang.org/x/xerrors" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/rest" + "log" + "os" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/handler" + crLog "sigs.k8s.io/controller-runtime/pkg/log" + crZap "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/metrics/server" +) + +type flags struct { + operatorClusterName string + namespace string + operatorNamespace string + typeParam string + name string + watch bool + deployPods bool + diffwatchImage string +} + +func debugCmd(ctx context.Context) error { + flags := flags{} + flagSet := flag.NewFlagSet("debug", flag.ExitOnError) + flagSet.StringVar(&flags.operatorClusterName, "context", "", "Operator context name") + flagSet.StringVar(&flags.namespace, "namespace", os.Getenv("NAMESPACE"), "Namespace of the resource, default value from NAMESPACE env var") + flagSet.StringVar(&flags.operatorNamespace, "operator-namespace", os.Getenv("NAMESPACE"), "Namespace of the operator, default value from NAMESPACE env var") + flagSet.StringVar(&flags.typeParam, "type", "", "Type od crd: om, mdb, mdbmc. Optional if --watch is specified.") + flagSet.StringVar(&flags.name, "name", "", "Name of the resource") + flagSet.BoolVar(&flags.deployPods, "deployPods", false, "Specify if all debug pods should be deployed immediately (all debug statefulsets are scaled to 1). If not specified the statefulsets are scaled to zero, so no debug pods are deployed.") + flagSet.BoolVar(&flags.watch, "watch", false, "Specify to run in the operator mode, i.e. watch the resource for changes and deploy new debugging pods") + flagSet.StringVar(&flags.name, "viewLogDir", "", "Log directory to render tmux session for") + flagSet.StringVar(&flags.diffwatchImage, "diffwatchImage", "quay.io/lsierant/diffwatch:latest", "Docker image for diffwatch container") + if err := flagSet.Parse(os.Args[1:]); err != nil { + return err + } + + if !flags.watch && (flags.operatorClusterName == "" || flags.typeParam == "" || flags.name == "") { + fmt.Print(`mdbdebug is deploying debug pods for realtime observability. + +This utility is only watching MongoDB and MongoDBOpsManager resources and prepares the deployment of debug pods. +Debug pod, when it's spawned for each or a specified database/OM pod is then taking role of monitoring its resources (jsons, automation config, etc.) for changes. +Hook into debug pods with attach.sh script. + +There are two modes of operation: + - In watch mode (-watch) it's working as an operator hooking watching all MongoDB and MongoDBOpsManager CRs and reacting to changes in the topology. + Basically the operator mode is executing the single resource mode on any resource change and it's automatically figuring out necessary parameters. + - In single resource mode you must specify exact details of the resource you want to debug: type (-type), name (-name). + It executes once, deploys all necessary debug resources, dumps commands to attach to the debug pods and exits. + +Examples: + $ mdbdebug + +`) + flagSet.Usage() + return xerrors.Errorf("missing arguments") + } + + return debug(ctx, flags) +} + +func getMemberClusters(operatorClusterName string, c client.Client, namespace string) ([]string, error) { + m := corev1.ConfigMap{} + err := c.Get(context.Background(), types.NamespacedName{Name: util.MemberListConfigMapName, Namespace: namespace}, &m) + if err != nil { + return []string{operatorClusterName}, err + } + + members := []string{operatorClusterName} + for member := range m.Data { + members = append(members, member) + } + + return members, nil +} + +func debug(ctx context.Context, flags flags) error { + kubeConfigPath := LoadKubeConfigFilePath() + log.Printf("Creating k8s client from %s for context %s", kubeConfigPath, flags.operatorClusterName) + + var operatorClusterMap map[string]client.Client + var operatorConfigMap map[string]*rest.Config + + runningInCluster := false + if inClusterConfig, err := rest.InClusterConfig(); err == nil { + operatorClusterMap, operatorConfigMap, err = createOperatorClusterMapFromInClusterConfig(flags.operatorClusterName, inClusterConfig) + runningInCluster = true + } else { + operatorClusterMap, operatorConfigMap, err = createClusterMap([]string{flags.operatorClusterName}, kubeConfigPath) + if err != nil { + return xerrors.Errorf("failed to initialize client for the operator cluster %s from kubeconfig %s: %w", flags.operatorClusterName, kubeConfigPath, err) + } + } + + operatorClient, ok := operatorClusterMap[flags.operatorClusterName] + if !ok { + return xerrors.Errorf("failed to initialize central cluster %s client", flags.operatorClusterName) + } + + clusterMap := operatorClusterMap + if !runningInCluster { + clusterNames, err := getMemberClusters(flags.operatorClusterName, operatorClient, flags.operatorNamespace) + if err != nil { + if errors.IsNotFound(err) { + clusterNames = []string{flags.operatorClusterName} + } else { + return xerrors.Errorf("failed to get cluster names from the config map from cluster %s and namespace %s: %w", flags.operatorClusterName, flags.operatorNamespace, err) + } + } + + clusterMap, _, err = createClusterMap(clusterNames, kubeConfigPath) + if err != nil { + return xerrors.Errorf("failed to initialize client map for cluster names %v from kubeconfig %s: %w", clusterNames, kubeConfigPath, err) + } + } + + if flags.watch { + return deployDebugWithWatch(ctx, flags.operatorClusterName, operatorConfigMap, flags.typeParam, flags.namespace, flags.operatorNamespace, flags.name, clusterMap, flags.deployPods, flags.diffwatchImage) + } else { + return deployDebugWithoutWatch(ctx, flags.operatorClusterName, flags.typeParam, flags.namespace, flags.operatorNamespace, flags.name, clusterMap, flags.deployPods, flags.diffwatchImage) + } +} + +func deployDebugWithWatch(ctx context.Context, operatorClusterName string, configMap map[string]*rest.Config, resourceType string, namespace string, operatorNamespace string, resourceName string, clusterMap map[string]client.Client, deployPods bool, diffwatchImage string) error { + crLog.SetLogger(crZap.New()) + + mgr, err := manager.New(configMap[operatorClusterName], manager.Options{ + Scheme: CurrentScheme(), + Metrics: server.Options{BindAddress: "0"}, + HealthProbeBindAddress: "0", + }) + if err != nil { + return xerrors.Errorf("cannot create manager: %w", err) + } + + err = builder.ControllerManagedBy(mgr). + For(&mdbv1.MongoDB{}). + Watches(&mdbv1.MongoDB{}, &handler.EnqueueRequestForObject{}). + WithOptions(controller.Options{ + MaxConcurrentReconciles: 5, + }). + Complete(newMongoDBReconciler(operatorClusterName, namespace, clusterMap, deployPods, diffwatchImage)) + if err != nil { + return xerrors.Errorf("error building MongoDB controller: %w", err) + } + + err = builder.ControllerManagedBy(mgr). + For(&omv1.MongoDBOpsManager{}). + Watches(&omv1.MongoDBOpsManager{}, &handler.EnqueueRequestForObject{}). + WithOptions(controller.Options{ + MaxConcurrentReconciles: 5, + }). + Complete(newOpsManagerReconciler(operatorClusterName, namespace, clusterMap, deployPods, diffwatchImage)) + if err != nil { + return xerrors.Errorf("error building MongoDB controller: %w", err) + } + + err = builder.ControllerManagedBy(mgr). + For(&mdbcv1.MongoDBCommunity{}). + Watches(&mdbcv1.MongoDBCommunity{}, &handler.EnqueueRequestForObject{}). + WithOptions(controller.Options{ + MaxConcurrentReconciles: 5, + }). + Complete(newMongoDBCommunityReconciler(operatorClusterName, namespace, clusterMap[operatorClusterName], deployPods, diffwatchImage)) + if err != nil { + return xerrors.Errorf("error building MongoDBCommunity controller: %w", err) + } + + err = builder.ControllerManagedBy(mgr). + For(&searchv1.MongoDBSearch{}). + Watches(&searchv1.MongoDBSearch{}, &handler.EnqueueRequestForObject{}). + WithOptions(controller.Options{ + MaxConcurrentReconciles: 5, + }). + Complete(newMongoDBSearchReconciler(operatorClusterName, namespace, clusterMap[operatorClusterName], deployPods, diffwatchImage)) + if err != nil { + return xerrors.Errorf("error building MongoDBCommunity controller: %w", err) + } + + if err := mgr.Start(ctx); err != nil { + return xerrors.Errorf("error starting controller: %w", err) + } + + return nil +} + +type attachCommand struct { + Command string `json:"command,omitempty"` + ShortName string `json:"shortName,omitempty"` + PodName string `json:"podName,omitempty"` + DebugPodName string `json:"debugPodName,omitempty"` + DebugStsName string `json:"debugStsName,omitempty"` + ResourceType string `json:"resourceType,omitempty"` + ResourceName string `json:"resourceName,omitempty"` + OperatorContext string `json:"operatorContext,omitempty"` + DebugPodContext string `json:"debugPodContext,omitempty"` + Namespace string `json:"namespace,omitempty"` +} + +func createOrUpdateAttachCommandsCM(ctx context.Context, logger *zap.SugaredLogger, resourceNamespace string, resourceName string, resourceType string, attachCommands []attachCommand, operatorClient client.Client) error { + attachCommandsBytes, err := json.Marshal(attachCommands) + if err != nil { + return nil + } + attachCommandsData := map[string]string{ + "commands": string(attachCommandsBytes), + "resourceType": resourceType, + "resourceName": resourceName, + } + attachCommandsCM := corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: resourceNamespace, + Name: fmt.Sprintf("mdb-debug-attach-commands-%s-%s", resourceType, resourceName), + Labels: mdbDebugLabels, + }, + Data: attachCommandsData, + } + cmName := types.NamespacedName{Namespace: attachCommandsCM.ObjectMeta.Namespace, Name: attachCommandsCM.ObjectMeta.Name} + err = operatorClient.Get(ctx, cmName, &attachCommandsCM) + if err != nil && !errors.IsNotFound(err) { + return xerrors.Errorf("error getting MongoDB resource %s: %w", attachCommandsCM.Name, err) + } + + if errors.IsNotFound(err) { + if err := operatorClient.Create(ctx, &attachCommandsCM); err != nil { + return xerrors.Errorf("error creating attach commands config map %s: %w", attachCommandsCM.Name, err) + } + } else { + attachCommandsCM.Data = attachCommandsData + if err := operatorClient.Update(ctx, &attachCommandsCM); err != nil { + return xerrors.Errorf("error updating attach commands config map %s: %w", attachCommandsCM.Name, err) + } + } + + logger.Debugf("Saved attach commands to %s config map:\n%s", attachCommandsCM.Name, attachCommandsData["commands"]) + return nil +} + +func deployDebugWithoutWatch(ctx context.Context, operatorClusterName string, resourceType string, namespace string, operatorNamespace string, resourceName string, clusterMap map[string]client.Client, deployPods bool, diffwatchImage string) error { + logger := zap.S() + var err error + var attachCommands []attachCommand + switch resourceType { + case "om": + attachCommands, err = debugOpsManager(ctx, clusterMap, operatorClusterName, namespace, resourceName, deployPods, diffwatchImage) + case "mdb": + attachCommands, err = debugMongoDB(ctx, clusterMap, operatorClusterName, namespace, resourceName, deployPods, diffwatchImage) + case "mdbc": + attachCommands, err = debugMongoDBCommunity(ctx, namespace, resourceName, operatorClusterName, kubernetesClient.NewClient(clusterMap[operatorClusterName]), deployPods, diffwatchImage) + case "mdbs": + attachCommands, err = debugMongoDBSearch(ctx, namespace, resourceName, operatorClusterName, kubernetesClient.NewClient(clusterMap[operatorClusterName]), deployPods, diffwatchImage) + } + + err = createOrUpdateAttachCommandsCM(ctx, logger, namespace, resourceName, resourceType, attachCommands, clusterMap[operatorClusterName]) + + return err +} + +func createKubectlAttachCommand(operatorClusterName string, memberClusterName string, namespace string, podName string, debugPodName string) string { + contextName := memberClusterName + if memberClusterName == multicluster.LegacyCentralClusterName { + contextName = operatorClusterName + } + return fmt.Sprintf(`kubectl --context %s --namespace %s -it exec %s -- tmux attach`, contextName, namespace, debugPodName) +} + +func debugOpsManager(ctx context.Context, clusterMap map[string]client.Client, operatorClusterName, namespace, name string, deployPods bool, diffwatchImage string) ([]attachCommand, error) { + centralClusterClient := clusterMap[operatorClusterName] + + opsManager := omv1.MongoDBOpsManager{} + if err := centralClusterClient.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, &opsManager); err != nil { + return nil, xerrors.Errorf("error getting resource %s/%s", namespace, name) + } + + appDB := opsManager.Spec.AppDB + commonController := operator.NewReconcileCommonController(ctx, centralClusterClient) + + appDBReconciler, err := operator.NewAppDBReplicaSetReconciler(ctx, nil, "", appDB, commonController, nil, opsManager.Annotations, clusterMap, zap.S()) + if err != nil { + return nil, err + } + + var attachCommands []attachCommand + for _, memberCluster := range appDBReconciler.GetHealthyMemberClusters() { + fmt.Printf("appdb member cluster: %+v\n", memberCluster) + if err := createServiceAccountAndRoles(ctx, memberCluster.Client, namespace); err != nil { + return nil, xerrors.Errorf("failed to create service account and roles in cluster %s: %w", memberCluster.Name, err) + } + + if appDBAttachCommands, err := debugAppDB(ctx, &opsManager, operatorClusterName, memberCluster, deployPods, diffwatchImage); err != nil { + return nil, xerrors.Errorf("failed to debug appdb %s/%s in cluster %s: %w", namespace, appDB.Name(), memberCluster.Name, err) + } else { + attachCommands = append(attachCommands, appDBAttachCommands...) + } + } + + centralClient := kubernetesClient.NewClient(centralClusterClient) + + opsManagerReconciler := operator.NewOpsManagerReconciler(ctx, centralClient, clusterMap, nil, "", "", nil, nil, nil) + + omReconcilerHelper, err := operator.NewOpsManagerReconcilerHelper(ctx, opsManagerReconciler, &opsManager, clusterMap, zap.S()) + if err != nil { + return nil, xerrors.Errorf("failed to create NewOpsManagerReconcilerHelper: %w", err) + } + + for _, memberCluster := range omReconcilerHelper.GetMemberClusters() { + fmt.Printf("om member cluster: %+v\n", memberCluster) + if err := createServiceAccountAndRoles(ctx, memberCluster.Client, namespace); err != nil { + return nil, xerrors.Errorf("failed to create service account and roles in cluster %s: %w", memberCluster.Name, err) + } + + if omAttachCommands, err := debugOM(ctx, &opsManager, *omReconcilerHelper, operatorClusterName, memberCluster, deployPods, diffwatchImage); err != nil { + return nil, xerrors.Errorf("failed to debug appdb %s/%s in cluster %s: %w", namespace, appDB.Name(), memberCluster.Name, err) + } else { + attachCommands = append(attachCommands, omAttachCommands...) + } + } + + return attachCommands, nil +} + +func debugMongoDB(ctx context.Context, clusterMap map[string]client.Client, operatorClusterName, namespace, name string, deployPods bool, diffwatchImage string) ([]attachCommand, error) { + centralClusterClient := clusterMap[operatorClusterName] + + mdb := mdbv1.MongoDB{} + if err := centralClusterClient.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, &mdb); err != nil { + return nil, xerrors.Errorf("error getting resource %s/%s", namespace, name) + } + + switch mdb.Spec.ResourceType { + case mdbv1.ShardedCluster: + if attachCommands, err := debugShardedCluster(ctx, &mdb, operatorClusterName, clusterMap, deployPods, diffwatchImage); err != nil { + return nil, err + } else { + return attachCommands, nil + } + case mdbv1.ReplicaSet: + if mdb.Spec.GetTopology() == "MultiCluster" { + if attachCommands, err := debugMultiReplicaSet(ctx, mdb.Namespace, mdb.Name, &mdb.Spec.DbCommonSpec, mdb.Annotations, operatorClusterName, clusterMap, deployPods, diffwatchImage); err != nil { + return nil, err + } else { + return attachCommands, nil + } + } else { + if attachCommands, err := debugReplicaSet(ctx, mdb.Namespace, mdb.Name, &mdb.Spec.DbCommonSpec, mdb.Annotations, mdb.Spec.Replicas(), operatorClusterName, clusterMap, deployPods, diffwatchImage); err != nil { + return nil, err + } else { + return attachCommands, nil + } + } + default: + panic("not implemented") + } +} + +func getHealthyMemberClusters(memberClusters []multicluster.MemberCluster) []multicluster.MemberCluster { + var result []multicluster.MemberCluster + for i := range memberClusters { + if memberClusters[i].Healthy { + result = append(result, memberClusters[i]) + } + } + + return result +} + +func debugShardedCluster(ctx context.Context, mdb *mdbv1.MongoDB, operatorClusterName string, clusterMap map[string]client.Client, deployPods bool, diffwatchImage string) ([]attachCommand, error) { + commonController := operator.NewReconcileCommonController(ctx, clusterMap[operatorClusterName]) + reconcilerHelper, err := operator.NewShardedClusterReconcilerHelper(ctx, commonController, nil, "", "", true, false, mdb, clusterMap, om.NewOpsManagerConnection, zap.S()) + if err != nil { + return nil, err + } + var allAttachCommands []attachCommand + for _, memberCluster := range getHealthyMemberClusters(reconcilerHelper.MongosMemberClusters()) { + if err := createServiceAccountAndRoles(ctx, memberCluster.Client, mdb.Namespace); err != nil { + return nil, xerrors.Errorf("failed to create service account and roles in cluster %s: %w", memberCluster.Name, err) + } + + if attachCommands, err := debugMongos(ctx, mdb, operatorClusterName, reconcilerHelper, memberCluster, deployPods, diffwatchImage); err != nil { + return nil, xerrors.Errorf("failed to debug MongoDB mongos %s/%s in cluster %s: %w", mdb.Namespace, mdb.Name, memberCluster.Name, err) + } else { + allAttachCommands = append(allAttachCommands, attachCommands...) + } + } + + for _, memberCluster := range getHealthyMemberClusters(reconcilerHelper.ConfigSrvMemberClusters()) { + if err := createServiceAccountAndRoles(ctx, memberCluster.Client, mdb.Namespace); err != nil { + return nil, xerrors.Errorf("failed to create service account and roles in cluster %s: %w", memberCluster.Name, err) + } + + if attachCommands, err := debugConfigServers(ctx, mdb, operatorClusterName, reconcilerHelper, memberCluster, deployPods, diffwatchImage); err != nil { + return nil, xerrors.Errorf("failed to debug MongoDB mongos %s/%s in cluster %s: %w", mdb.Namespace, mdb.Name, memberCluster.Name, err) + } else { + allAttachCommands = append(allAttachCommands, attachCommands...) + } + } + + for shardIdx := 0; shardIdx < len(reconcilerHelper.DesiredShardsConfiguration()); shardIdx++ { + for _, memberCluster := range getHealthyMemberClusters(reconcilerHelper.ShardsMemberClustersMap()[shardIdx]) { + if err := createServiceAccountAndRoles(ctx, memberCluster.Client, mdb.Namespace); err != nil { + return nil, xerrors.Errorf("failed to create service account and roles in cluster %s: %w", memberCluster.Name, err) + } + + if attachCommands, err := debugShardsServers(ctx, mdb, operatorClusterName, reconcilerHelper, shardIdx, memberCluster, deployPods, diffwatchImage); err != nil { + return nil, xerrors.Errorf("failed to debug MongoDB mongos %s/%s in cluster %s: %w", mdb.Namespace, mdb.Name, memberCluster.Name, err) + } else { + allAttachCommands = append(allAttachCommands, attachCommands...) + } + } + } + + return allAttachCommands, nil +} + +func debugReplicaSet(ctx context.Context, resourceNamespace string, resourceName string, mdb *mdbv1.DbCommonSpec, mdbAnnotations map[string]string, singleClusterMembers int, operatorClusterName string, clusterMap map[string]client.Client, deployPods bool, diffwatchImage string) ([]attachCommand, error) { + var attachCommands []attachCommand + for _, memberCluster := range getHealthyMemberClusters(getReplicaSetMemberClusters(mdb, singleClusterMembers, clusterMap, operatorClusterName)) { + if err := createServiceAccountAndRoles(ctx, memberCluster.Client, resourceNamespace); err != nil { + return nil, xerrors.Errorf("failed to create service account and roles in cluster %s: %w", memberCluster.Name, err) + } + + if replicaSetAttachCommands, err := debugReplicaSetPods(ctx, resourceNamespace, resourceName, mdb, mdbAnnotations, operatorClusterName, memberCluster, deployPods, diffwatchImage); err != nil { + return nil, xerrors.Errorf("failed to debug MongoDB mongos %s/%s in cluster %s: %w", resourceNamespace, resourceName, memberCluster.Name, err) + } else { + attachCommands = append(attachCommands, replicaSetAttachCommands...) + } + } + + return attachCommands, nil +} + +func debugMultiReplicaSet(ctx context.Context, resourceNamespace string, resourceName string, dbCommonSpec *mdbv1.DbCommonSpec, mdbAnnotations map[string]string, operatorClusterName string, clusterMap map[string]client.Client, deployPods bool, diffwatchImage string) ([]attachCommand, error) { + var attachCommands []attachCommand + // FIXME singleClusterMembers + for _, memberCluster := range getHealthyMemberClusters(getReplicaSetMemberClusters(dbCommonSpec, 0, clusterMap, operatorClusterName)) { + if err := createServiceAccountAndRoles(ctx, memberCluster.Client, resourceNamespace); err != nil { + return nil, xerrors.Errorf("failed to create service account and roles in cluster %s: %w", memberCluster.Name, err) + } + + if replicaSetAttachCommands, err := debugReplicaSetPods(ctx, resourceNamespace, resourceName, dbCommonSpec, mdbAnnotations, operatorClusterName, memberCluster, deployPods, diffwatchImage); err != nil { + return nil, xerrors.Errorf("failed to debug MongoDB mongos %s/%s in cluster %s: %w", resourceNamespace, resourceName, memberCluster.Name, err) + } else { + attachCommands = append(attachCommands, replicaSetAttachCommands...) + } + } + + return attachCommands, nil +} + +func getReplicaSetMemberClusters(mdb *mdbv1.DbCommonSpec, singleClusterMembers int, clusterMap map[string]client.Client, operatorClusterName string) []multicluster.MemberCluster { + if mdb.Topology != mdbv1.ClusterTopologyMultiCluster { + kubeClient := kubernetesClient.NewClient(clusterMap[operatorClusterName]) + legacyCluster := multicluster.GetLegacyCentralMemberCluster(singleClusterMembers, 0, kubeClient, secrets.SecretClient{ + VaultClient: nil, + KubeClient: kubeClient, + }) + return []multicluster.MemberCluster{legacyCluster} + } else { + + } + + panic("Not implemented") + return nil +} diff --git a/tools/mdbdebug/cmd/mdbdebug/debug_test.go b/tools/mdbdebug/cmd/mdbdebug/debug_test.go new file mode 100644 index 000000000..5a006df7e --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/debug_test.go @@ -0,0 +1,25 @@ +package main + +import ( + "context" + "github.com/stretchr/testify/require" + "testing" + "time" +) + +func TestDebug(t *testing.T) { + ctx := context.Background() + ctx, cancel := context.WithTimeout(ctx, time.Second*20) + defer cancel() + + flags := flags{ + operatorClusterName: "gke_scratch-kubernetes-team_europe-central2-a_k8s-mdb-0", + namespace: "mongodb", + operatorNamespace: "mongodb-operator", + typeParam: "", + name: "", + watch: true, + deployPods: false, + } + require.NoError(t, debug(ctx, flags)) +} diff --git a/tools/mdbdebug/cmd/mdbdebug/kube_resources.go b/tools/mdbdebug/cmd/mdbdebug/kube_resources.go new file mode 100644 index 000000000..84b630eeb --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/kube_resources.go @@ -0,0 +1,112 @@ +package main + +import ( + "context" + "fmt" + restclient "k8s.io/client-go/rest" + "os" + "path/filepath" + + "github.com/mongodb/mongodb-kubernetes/pkg/multicluster" + "golang.org/x/xerrors" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/util/homedir" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +func LoadKubeConfigFilePath() string { + env := os.Getenv("KUBECONFIG") + if env != "" { + return env + } + return filepath.Join(homedir.HomeDir(), ".kube", "config") +} + +func createClusterMap(clusterNames []string, kubeConfigPath string) (map[string]client.Client, map[string]*restclient.Config, error) { + clusterMap := map[string]client.Client{} + configMap := map[string]*restclient.Config{} + + clusterClientMap, err := multicluster.CreateMemberClusterClients(clusterNames, kubeConfigPath) + if err != nil { + return nil, nil, xerrors.Errorf("failed to create k8s client from %s: %w", kubeConfigPath, err) + } + + for memberClusterName, restConfig := range clusterClientMap { + clientObj, err := client.New(restConfig, client.Options{ + Scheme: CurrentScheme(), + }) + if err != nil { + return nil, nil, xerrors.Errorf("failed to create k8s cluster object from %s for context %s: %w", kubeConfigPath, memberClusterName, err) + } + clusterMap[memberClusterName] = clientObj + configMap[memberClusterName] = restConfig + } + + return clusterMap, configMap, nil +} + +func createOperatorClusterMapFromInClusterConfig(operatorClusterName string, inClusterConfig *restclient.Config) (map[string]client.Client, map[string]*restclient.Config, error) { + clusterMap := map[string]client.Client{} + configMap := map[string]*restclient.Config{} + + clientObj, err := client.New(inClusterConfig, client.Options{ + Scheme: CurrentScheme(), + }) + if err != nil { + return nil, nil, xerrors.Errorf("failed to create in-cluster k8s client: %w", err) + } + + clusterMap[operatorClusterName] = clientObj + configMap[operatorClusterName] = inClusterConfig + + return clusterMap, configMap, nil +} + +func createServiceAccountAndRoles(ctx context.Context, kubeClient client.Client, namespace string) error { + sa := corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mdb-debug-sa-cluster-admin", + Namespace: namespace, + Labels: mdbDebugLabels, + }, + ImagePullSecrets: []corev1.LocalObjectReference{ + {Name: "image-registries-secret"}, + }, + } + + if err := kubeClient.Create(ctx, &sa); err != nil { + if !errors.IsAlreadyExists(err) { + return xerrors.Errorf("error creating service account: %w", err) + } + } + + roleBinding := rbacv1.ClusterRoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("mdb-debug-cluster-admin-%s", namespace), + Labels: mdbDebugLabels, + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: "mdb-debug-sa-cluster-admin", + Namespace: sa.Namespace, + }, + }, + RoleRef: rbacv1.RoleRef{ + Kind: "ClusterRole", + Name: "cluster-admin", + APIGroup: "rbac.authorization.k8s.io", + }, + } + + if err := kubeClient.Create(ctx, &roleBinding); err != nil { + if !errors.IsAlreadyExists(err) { + return xerrors.Errorf("error creating role binding: %w", err) + } + } + + return nil +} diff --git a/tools/mdbdebug/cmd/mdbdebug/mdbdebug.go b/tools/mdbdebug/cmd/mdbdebug/mdbdebug.go new file mode 100644 index 000000000..bab38e914 --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/mdbdebug.go @@ -0,0 +1,51 @@ +package main + +import ( + "context" + "fmt" + mdbv1 "github.com/mongodb/mongodb-kubernetes/api/v1/mdb" + omv1 "github.com/mongodb/mongodb-kubernetes/api/v1/om" + mdbcv1 "github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/api/v1" + "os" + "os/signal" + "syscall" + + apiv1 "github.com/mongodb/mongodb-kubernetes/api/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" +) + +var scheme = runtime.NewScheme() + +func init() { + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + utilruntime.Must(apiv1.AddToScheme(scheme)) + utilruntime.Must(corev1.AddToScheme(scheme)) + utilruntime.Must(mdbv1.AddToScheme(scheme)) + utilruntime.Must(omv1.AddToScheme(scheme)) + utilruntime.Must(mdbcv1.AddToScheme(scheme)) +} + +func CurrentScheme() *runtime.Scheme { + return scheme +} + +func main() { + ctx, cancel := context.WithCancel(context.Background()) + + signalChan := make(chan os.Signal, 1) + signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM) + + go func() { + <-signalChan + cancel() + }() + + err := debugCmd(ctx) + if err != nil { + fmt.Printf("%+v\n", err) + os.Exit(1) + } +} diff --git a/tools/mdbdebug/cmd/mdbdebug/mongodb.go b/tools/mdbdebug/cmd/mdbdebug/mongodb.go new file mode 100644 index 000000000..da70df0c8 --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/mongodb.go @@ -0,0 +1,385 @@ +package main + +import ( + "context" + "fmt" + mdbv1 "github.com/mongodb/mongodb-kubernetes/api/v1/mdb" + "github.com/mongodb/mongodb-kubernetes/controllers/operator" + "github.com/mongodb/mongodb-kubernetes/pkg/dns" + "github.com/mongodb/mongodb-kubernetes/pkg/multicluster" + "github.com/mongodb/mongodb-kubernetes/pkg/util/architectures" + "golang.org/x/xerrors" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func debugMongos(ctx context.Context, mdb *mdbv1.MongoDB, centralClusterName string, reconcilerHelper *operator.ShardedClusterReconcileHelper, memberCluster multicluster.MemberCluster, deployPods bool, diffwatchImage string) ([]attachCommand, error) { + var attachCommands []attachCommand + for podIdx := 0; podIdx < reconcilerHelper.GetMongosScaler(memberCluster).DesiredReplicas(); podIdx++ { + stsName := reconcilerHelper.GetMongosStsName(memberCluster) + templateData := mongosTemplateData(mdb, memberCluster, stsName, podIdx) + scriptsHash, err := renderTemplatesAndCreateConfigMap(ctx, memberCluster, templateData, podConfigMapName(podName(stsName, podIdx)), "mongos_entrypoint.sh.tpl", "mongos_tmux_session.yaml.tpl") + if err != nil { + return nil, xerrors.Errorf("error creating mongos config map in cluster %s: %w", memberCluster.Name, err) + } + + sts := createMongosStatefulSetObject(mdb.Namespace, scriptsHash, templateData, deployPods, diffwatchImage) + if err = createStatefulSet(ctx, sts, memberCluster.Client); err != nil { + return nil, xerrors.Errorf("error creating mongos statefulset in cluster %s: %w", memberCluster.Name, err) + } + + attachCommands = append(attachCommands, newAttachCommand(templateData, centralClusterName, memberCluster.Name)) + } + + return attachCommands, nil +} + +func debugConfigServers(ctx context.Context, mdb *mdbv1.MongoDB, centralClusterName string, reconcilerHelper *operator.ShardedClusterReconcileHelper, memberCluster multicluster.MemberCluster, deployPods bool, diffwatchImage string) ([]attachCommand, error) { + var attachCommands []attachCommand + for podIdx := 0; podIdx < reconcilerHelper.GetConfigSrvScaler(memberCluster).DesiredReplicas(); podIdx++ { + stsName := reconcilerHelper.GetConfigSrvStsName(memberCluster) + templateData := configServerTemplateData(mdb, memberCluster, stsName, podIdx) + scriptsHash, err := renderTemplatesAndCreateConfigMap(ctx, memberCluster, templateData, podConfigMapName(podName(stsName, podIdx)), "replicaset_entrypoint.sh.tpl", "replicaset_tmux_session.yaml.tpl") + if err != nil { + return nil, xerrors.Errorf("error creating mongos config map in cluster %s: %w", memberCluster.Name, err) + } + + sts := createReplicaSetStatefulSetObject(mdb.Namespace, scriptsHash, templateData, deployPods, diffwatchImage) + if err = createStatefulSet(ctx, sts, memberCluster.Client); err != nil { + return nil, xerrors.Errorf("error creating config server statefulset in cluster %s: %w", memberCluster.Name, err) + } + + attachCommands = append(attachCommands, newAttachCommand(templateData, centralClusterName, memberCluster.Name)) + } + + return attachCommands, nil +} + +func debugShardsServers(ctx context.Context, mdb *mdbv1.MongoDB, centralClusterName string, reconcilerHelper *operator.ShardedClusterReconcileHelper, shardIdx int, memberCluster multicluster.MemberCluster, deployPods bool, diffwatchImage string) ([]attachCommand, error) { + var attachCommands []attachCommand + for podIdx := 0; podIdx < reconcilerHelper.GetShardScaler(shardIdx, memberCluster).DesiredReplicas(); podIdx++ { + stsName := reconcilerHelper.GetShardStsName(shardIdx, memberCluster) + templateData := shardTemplateData(mdb, memberCluster, stsName, shardIdx, podIdx) + scriptsHash, err := renderTemplatesAndCreateConfigMap(ctx, memberCluster, templateData, podConfigMapName(podName(stsName, podIdx)), "replicaset_entrypoint.sh.tpl", "replicaset_tmux_session.yaml.tpl") + if err != nil { + return nil, xerrors.Errorf("error creating mongos config map in cluster %s: %w", memberCluster.Name, err) + } + + sts := createReplicaSetStatefulSetObject(mdb.Namespace, scriptsHash, templateData, deployPods, diffwatchImage) + if err = createStatefulSet(ctx, sts, memberCluster.Client); err != nil { + return nil, xerrors.Errorf("error creating config server statefulset in cluster %s: %w", memberCluster.Name, err) + } + + attachCommands = append(attachCommands, newAttachCommand(templateData, centralClusterName, memberCluster.Name)) + } + + return attachCommands, nil +} + +func debugReplicaSetPods(ctx context.Context, resourceNamespace string, resourceName string, mdb *mdbv1.DbCommonSpec, mdbAnnotations map[string]string, centralClusterName string, memberCluster multicluster.MemberCluster, deployPods bool, diffwatchImage string) ([]attachCommand, error) { + var attachCommands []attachCommand + for podIdx := 0; podIdx < memberCluster.Replicas; podIdx++ { + templateData := replicaSetTemplateData(resourceNamespace, resourceName, mdb, mdbAnnotations, memberCluster, podIdx) + stsName := replicaSetStatefulSetName(resourceName, memberCluster) + scriptsHash, err := renderTemplatesAndCreateConfigMap(ctx, memberCluster, templateData, podConfigMapName(podName(stsName, podIdx)), "replicaset_entrypoint.sh.tpl", "replicaset_tmux_session.yaml.tpl") + if err != nil { + return nil, xerrors.Errorf("error creating mongos config map in cluster %s: %w", memberCluster.Name, err) + } + + sts := createReplicaSetStatefulSetObject(resourceNamespace, scriptsHash, templateData, deployPods, diffwatchImage) + if err = createStatefulSet(ctx, sts, memberCluster.Client); err != nil { + return nil, xerrors.Errorf("error creating config server statefulset in cluster %s: %w", memberCluster.Name, err) + } + + attachCommands = append(attachCommands, newAttachCommand(templateData, centralClusterName, memberCluster.Name)) + } + + return attachCommands, nil +} + +var mdbDebugLabels = map[string]string{ + "mdb-debug": "true", +} + +func createReplicaSetStatefulSetObject(namespace string, scriptsHash string, templateData TemplateData, deployPods bool, diffwatchImage string) appsv1.StatefulSet { + deploymentName := fmt.Sprintf("mdb-debug-%s", templateData.PodName) + + command := ` +set -x +cp /scripts/entrypoint.sh ./entrypoint.sh +chmod +x ./entrypoint.sh +cat entrypoint.sh +./entrypoint.sh +` + return appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: deploymentName, + Namespace: namespace, + Labels: mdbDebugLabels, + }, + Spec: appsv1.StatefulSetSpec{ + Replicas: replicas(deployPods), + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": deploymentName, + }}, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": deploymentName, + }, + Annotations: map[string]string{ + "scripts-hash": scriptsHash, + }, + }, + Spec: corev1.PodSpec{ + ServiceAccountName: "mdb-debug-sa-cluster-admin", + // Affinity rules are not necessary on Kind + // but in cloud (i.e. GKE) we need to co-locate debug pods with appdb pods + // on the same node to allow for multiple mounts to the same PV. + Affinity: &corev1.Affinity{ + PodAffinity: &corev1.PodAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "statefulset.kubernetes.io/pod-name", + Operator: metav1.LabelSelectorOpIn, + Values: []string{templateData.PodName}, + }, + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "data", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: fmt.Sprintf("data-%s", templateData.PodName), + }, + }, + }, + { + Name: "scripts", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{Name: fmt.Sprintf("mdb-debug-scripts-%s", templateData.PodName)}, + }, + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "mdb-debug", + Image: diffwatchImage, + ImagePullPolicy: corev1.PullAlways, + TTY: true, + Command: []string{"/bin/bash", "-c", command}, + VolumeMounts: []corev1.VolumeMount{ + { + MountPath: "/data", + Name: "data", + }, + { + MountPath: "/scripts", + Name: "scripts", + }, + }, + }, + }, + }, + }, + }, + } +} + +func createMongosStatefulSetObject(namespace string, scriptsHash string, templateData TemplateData, deployPods bool, diffwatchImage string) appsv1.StatefulSet { + stsName := fmt.Sprintf("mdb-debug-%s", templateData.PodName) + + command := ` +set -x +cp /scripts/entrypoint.sh ./entrypoint.sh +chmod +x ./entrypoint.sh +cat entrypoint.sh +./entrypoint.sh +` + return appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: stsName, + Namespace: namespace, + Labels: mdbDebugLabels, + }, + Spec: appsv1.StatefulSetSpec{ + Replicas: replicas(deployPods), + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": stsName, + }}, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": stsName, + }, + Annotations: map[string]string{ + "scripts-hash": scriptsHash, + }, + }, + Spec: corev1.PodSpec{ + ServiceAccountName: "mdb-debug-sa-cluster-admin", + Volumes: []corev1.Volume{ + { + Name: "data", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + }, + { + Name: "scripts", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{Name: fmt.Sprintf("mdb-debug-scripts-%s", templateData.PodName)}, + }, + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "mdb-debug", + Image: diffwatchImage, + ImagePullPolicy: corev1.PullAlways, + TTY: true, + Command: []string{"/bin/bash", "-c", command}, + VolumeMounts: []corev1.VolumeMount{ + { + MountPath: "/data", + Name: "data", + }, + { + MountPath: "/scripts", + Name: "scripts", + }, + }, + }, + }, + }, + }, + }, + } +} + +func renderTemplatesAndCreateConfigMap(ctx context.Context, memberCluster multicluster.MemberCluster, templateData TemplateData, configMapName string, entrypointTemplateName string, tmuxSessionTemplateName string) (string, error) { + entrypoint, err := renderTemplate(entrypointTemplateName, templateData) + if err != nil { + return "", xerrors.Errorf("failed to render %s: %w", entrypointTemplateName, err) + } + + tmuxSession, err := renderTemplate(tmuxSessionTemplateName, templateData) + if err != nil { + return "", xerrors.Errorf("failed to render %s: %w", tmuxSessionTemplateName, err) + } + + return createConfigMap(ctx, templateData.Namespace, memberCluster.Client, configMapName, entrypoint, tmuxSession) +} + +func mongosTemplateData(mdb *mdbv1.MongoDB, memberCluster multicluster.MemberCluster, stsName string, podIdx int) TemplateData { + return TemplateData{ + Namespace: mdb.Namespace, + ResourceName: mdb.Name, + ResourceType: "mdb", + StsName: stsName, + PodName: fmt.Sprintf("%s-%d", stsName, podIdx), + PodIdx: podIdx, + ClusterIdx: memberCluster.Index, + ShortName: fmt.Sprintf("mongos-%d-%d", memberCluster.Index, podIdx), + StaticArch: architectures.IsRunningStaticArchitecture(mdb.Annotations), + PodFQDN: getPodFQDN(mdb.Namespace, mdb.Name+"-mongos", &mdb.Spec.DbCommonSpec, memberCluster, podIdx), + ContainerName: containerName(architectures.IsRunningStaticArchitecture(mdb.Annotations)), + } +} + +func configServerTemplateData(mdb *mdbv1.MongoDB, memberCluster multicluster.MemberCluster, stsName string, podIdx int) TemplateData { + return TemplateData{ + Namespace: mdb.Namespace, + ResourceName: mdb.Name, + ResourceType: "mdb", + StsName: stsName, + PodName: fmt.Sprintf("%s-%d", stsName, podIdx), + PodIdx: podIdx, + ClusterIdx: memberCluster.Index, + ShortName: fmt.Sprintf("cs-%d-%d", memberCluster.Index, podIdx), + PodFQDN: getPodFQDN(mdb.Namespace, mdb.Name+"-config", &mdb.Spec.DbCommonSpec, memberCluster, podIdx), + StaticArch: architectures.IsRunningStaticArchitecture(mdb.Annotations), + ContainerName: containerName(architectures.IsRunningStaticArchitecture(mdb.Annotations)), + } +} + +func replicaSetTemplateData(resourceNamespace string, resourceName string, mdb *mdbv1.DbCommonSpec, mdbAnnotations map[string]string, memberCluster multicluster.MemberCluster, podIdx int) TemplateData { + return TemplateData{ + Namespace: resourceNamespace, + ResourceName: resourceName, + ResourceType: "mdb", + StsName: replicaSetStatefulSetName(resourceName, memberCluster), + PodName: fmt.Sprintf("%s-%d", replicaSetStatefulSetName(resourceName, memberCluster), podIdx), + PodIdx: podIdx, + ClusterIdx: memberCluster.Index, + TLSEnabled: mdb.IsSecurityTLSConfigEnabled(), + ShortName: fmt.Sprintf("rs-%d-%d", memberCluster.Index, podIdx), + PodFQDN: getPodFQDN(resourceNamespace, resourceName, mdb, memberCluster, podIdx), + StaticArch: architectures.IsRunningStaticArchitecture(mdbAnnotations), + ContainerName: containerName(architectures.IsRunningStaticArchitecture(mdbAnnotations)), + } +} + +func containerName(staticArch bool) string { + if staticArch { + return "mongodb-agent" + } else { + return "mongodb-enteprise-database" + } +} + +func getPodFQDN(resourceNamespace string, resourceName string, mdb *mdbv1.DbCommonSpec, memberCluster multicluster.MemberCluster, podIdx int) string { + if memberCluster.Legacy { + hostnames, _ := dns.GetDNSNames(resourceName, dns.GetServiceName(resourceName), resourceNamespace, "cluster.local", podIdx+1, mdb.GetExternalDomain()) + return hostnames[podIdx] + } else { + return dns.GetMultiClusterPodServiceFQDN(resourceName, resourceNamespace, memberCluster.Index, mdb.GetExternalDomain(), podIdx, "cluster.local") + } +} + +func shardTemplateData(mdb *mdbv1.MongoDB, memberCluster multicluster.MemberCluster, stsName string, shardIdx int, podIdx int) TemplateData { + return TemplateData{ + Namespace: mdb.Namespace, + ResourceName: mdb.Name, + ResourceType: "mdb", + StsName: stsName, + PodName: fmt.Sprintf("%s-%d", stsName, podIdx), + PodIdx: podIdx, + ClusterIdx: memberCluster.Index, + ShortName: fmt.Sprintf("sh-%d-%d-%d", memberCluster.Index, shardIdx, podIdx), + PodFQDN: getPodFQDN(mdb.Namespace, fmt.Sprintf("%s-%d", mdb.Name, shardIdx), &mdb.Spec.DbCommonSpec, memberCluster, podIdx), + StaticArch: architectures.IsRunningStaticArchitecture(mdb.Annotations), + ContainerName: containerName(architectures.IsRunningStaticArchitecture(mdb.Annotations)), + } +} + +func replicaSetStatefulSetName(resourceName string, memberCluster multicluster.MemberCluster) string { + if memberCluster.Legacy { + return resourceName + } + + return fmt.Sprintf("%s-%d", resourceName, memberCluster.Index) +} + +func podName(statefulSetName string, podIdx int) string { + return fmt.Sprintf("%s-%d", statefulSetName, podIdx) +} +func podConfigMapName(podName string) string { + return fmt.Sprintf("mdb-debug-scripts-%s", podName) +} diff --git a/tools/mdbdebug/cmd/mdbdebug/mongodb_community.go b/tools/mdbdebug/cmd/mdbdebug/mongodb_community.go new file mode 100644 index 000000000..a04fe2bdf --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/mongodb_community.go @@ -0,0 +1,203 @@ +package main + +import ( + "context" + "fmt" + mdbcv1 "github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/api/v1" + kubernetesClient "github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/kube/client" + "golang.org/x/xerrors" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/pointer" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +func mongoDBCommunityTemplateData(mdbc *mdbcv1.MongoDBCommunity, podIdx int) TemplateData { + return TemplateData{ + Namespace: mdbc.Namespace, + ResourceName: mdbc.Name, + ResourceType: "mdbc", + StsName: mdbc.Name, + PodName: fmt.Sprintf("%s-%d", mdbc.Name, podIdx), + PodIdx: podIdx, + ClusterIdx: 0, + ShortName: fmt.Sprintf("%s-%d", mdbc.Name, podIdx), + StaticArch: true, + ContainerName: "mongodb-agent", + MongoDBCommunity: true, + VolumeName: fmt.Sprintf("data-volume-%s-%d", mdbc.Name, podIdx), + BaseLogDir: "/logs", + } +} + +func mongoDBCommunityConfigMapName(mdbc *mdbcv1.MongoDBCommunity, podIdx int) string { + return fmt.Sprintf("mdb-debug-scripts-%s-%d", mdbc.Name, podIdx) +} + +func createMongoDBCommunityConfigMap(ctx context.Context, mdbc *mdbcv1.MongoDBCommunity, client client.Client, podIdx int) (string, error) { + templateData := mongoDBCommunityTemplateData(mdbc, podIdx) + entryPoint, err := renderTemplate("appdb_entrypoint.sh.tpl", templateData) + if err != nil { + return "", xerrors.Errorf("failed to render appdb_entrypoint.sh.tpl: %w", err) + } + + tmuxSession, err := renderTemplate("appdb_tmux_session.yaml.tpl", templateData) + if err != nil { + return "", xerrors.Errorf("failed to render appdb_tmux_session.yaml.tpl: %w", err) + } + + return createConfigMap(ctx, mdbc.Namespace, client, mongoDBCommunityConfigMapName(mdbc, podIdx), entryPoint, tmuxSession) +} + +func debugMongoDBCommunity(ctx context.Context, namespace string, name string, centralClusterName string, client kubernetesClient.Client, deployPods bool, diffwatchImage string) ([]attachCommand, error) { + mdbc := &mdbcv1.MongoDBCommunity{} + if err := client.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, mdbc); err != nil { + return nil, xerrors.Errorf("error getting resource %s/%s", namespace, name) + } + + if err := createServiceAccountAndRoles(ctx, client, namespace); err != nil { + return nil, xerrors.Errorf("failed to create service account and roles in cluster %s: %w", centralClusterName, err) + } + + var attachCommands []attachCommand + for podIdx := 0; podIdx < mdbc.Spec.Members; podIdx++ { + templateData := mongoDBCommunityTemplateData(mdbc, podIdx) + scriptsHash, err := createMongoDBCommunityConfigMap(ctx, mdbc, client, podIdx) + if err != nil { + return nil, xerrors.Errorf("error creating appdb config map in cluster %s: %w", centralClusterName, err) + } + + sts := createMCOStatefulSetObject(mdbc.Namespace, scriptsHash, templateData, deployPods, diffwatchImage) + if err = createStatefulSet(ctx, sts, client); err != nil { + return nil, xerrors.Errorf("error creating statefulset in cluster %s: %w", centralClusterName, err) + } + + attachCommands = append(attachCommands, newAttachCommand(templateData, centralClusterName, centralClusterName)) + } + + return attachCommands, nil +} + +func createMCOStatefulSetObject(namespace string, scriptsHash string, templateData TemplateData, deployPods bool, diffwatchImage string) appsv1.StatefulSet { + deploymentName := fmt.Sprintf("mdb-debug-%s", templateData.PodName) + + command := ` +set -x +cp /scripts/entrypoint.sh ./entrypoint.sh +chmod +x ./entrypoint.sh +cat entrypoint.sh +./entrypoint.sh +` + return appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: deploymentName, + Namespace: namespace, + Labels: mdbDebugLabels, + }, + Spec: appsv1.StatefulSetSpec{ + Replicas: replicas(deployPods), + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": deploymentName, + }}, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": deploymentName, + }, + Annotations: map[string]string{ + "scripts-hash": scriptsHash, + }, + }, + Spec: corev1.PodSpec{ + ServiceAccountName: "mdb-debug-sa-cluster-admin", + // Affinity rules are not necessary on Kind + // but in cloud (i.e. GKE) we need to co-locate debug pods with appdb pods + // on the same node to allow for multiple mounts to the same PV. + Affinity: &corev1.Affinity{ + PodAffinity: &corev1.PodAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "statefulset.kubernetes.io/pod-name", + Operator: metav1.LabelSelectorOpIn, + Values: []string{templateData.PodName}, + }, + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "data", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: templateData.VolumeName, + }, + }, + }, + { + Name: "logs", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: fmt.Sprintf("logs-volume-%s", templateData.PodName), + }, + }, + }, + { + Name: "automation-config", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + DefaultMode: pointer.Int32(416), + SecretName: fmt.Sprintf("%s-config", templateData.ResourceName), + }, + }, + }, + { + Name: "scripts", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{Name: fmt.Sprintf("mdb-debug-scripts-%s", templateData.PodName)}, + }, + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "mdb-debug", + Image: diffwatchImage, + ImagePullPolicy: corev1.PullAlways, + TTY: true, + Command: []string{"/bin/bash", "-c", command}, + VolumeMounts: []corev1.VolumeMount{ + { + MountPath: "/data", + Name: "data", + }, + { + MountPath: "/logs", + Name: "logs", + }, + { + MountPath: "/scripts", + Name: "scripts", + }, + { + MountPath: "/data/ac", + Name: "automation-config", + }, + }, + }, + }, + }, + }, + }, + } +} diff --git a/tools/mdbdebug/cmd/mdbdebug/mongodb_search.go b/tools/mdbdebug/cmd/mdbdebug/mongodb_search.go new file mode 100644 index 000000000..3399dfeee --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/mongodb_search.go @@ -0,0 +1,182 @@ +package main + +import ( + "context" + "fmt" + "github.com/mongodb/mongodb-kubernetes/api/v1/search" + kubernetesClient "github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/kube/client" + "golang.org/x/xerrors" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +func mongoDBSearchTemplateData(mdbs *search.MongoDBSearch, podIdx int) TemplateData { + podName := fmt.Sprintf("%s-search-%d", mdbs.Name, podIdx) + return TemplateData{ + Namespace: mdbs.Namespace, + ResourceName: mdbs.Name, + ResourceType: "mdbs", + StsName: fmt.Sprintf("%s-search", mdbs.Name), + PodName: podName, + PodIdx: podIdx, + ClusterIdx: 0, + ShortName: podName, + StaticArch: true, + ContainerName: "mongot", + VolumeName: fmt.Sprintf("data-%s-search-%d", mdbs.Name, podIdx), + BaseLogDir: "/logs", + } +} + +func mongoDBSearchConfigMapName(mdbs *search.MongoDBSearch, podIdx int) string { + return fmt.Sprintf("mdb-debug-scripts-mdbs-%s-search-%d", mdbs.Name, podIdx) +} + +func createMongoDBSearchConfigMap(ctx context.Context, mdbc *search.MongoDBSearch, client client.Client, podIdx int) (string, error) { + templateData := mongoDBSearchTemplateData(mdbc, podIdx) + entryPoint, err := renderTemplate("mongot_entrypoint.sh.tpl", templateData) + if err != nil { + return "", xerrors.Errorf("failed to render mongot_entrypoint.sh.tpl: %w", err) + } + + tmuxSession, err := renderTemplate("mongot_tmux_session.yaml.tpl", templateData) + if err != nil { + return "", xerrors.Errorf("failed to render mongot_tmux_session.yaml.tpl: %w", err) + } + + return createConfigMap(ctx, mdbc.Namespace, client, mongoDBSearchConfigMapName(mdbc, podIdx), entryPoint, tmuxSession) +} + +func debugMongoDBSearch(ctx context.Context, namespace string, name string, centralClusterName string, client kubernetesClient.Client, deployPods bool, diffwatchImage string) ([]attachCommand, error) { + mdbc := &search.MongoDBSearch{} + if err := client.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, mdbc); err != nil { + return nil, xerrors.Errorf("error getting resource %s/%s", namespace, name) + } + + if err := createServiceAccountAndRoles(ctx, client, namespace); err != nil { + return nil, xerrors.Errorf("failed to create service account and roles in cluster %s: %w", centralClusterName, err) + } + + var attachCommands []attachCommand + for podIdx := 0; podIdx < 1; podIdx++ { + templateData := mongoDBSearchTemplateData(mdbc, podIdx) + scriptsHash, err := createMongoDBSearchConfigMap(ctx, mdbc, client, podIdx) + if err != nil { + return nil, xerrors.Errorf("error creating appdb config map in cluster %s: %w", centralClusterName, err) + } + + sts := createSearchStatefulSetObject(mdbc.Namespace, scriptsHash, templateData, deployPods, diffwatchImage) + if err = createStatefulSet(ctx, sts, client); err != nil { + return nil, xerrors.Errorf("error creating statefulset in cluster %s: %w", centralClusterName, err) + } + + attachCommands = append(attachCommands, newAttachCommand(templateData, centralClusterName, centralClusterName)) + } + + return attachCommands, nil +} + +func createSearchStatefulSetObject(namespace string, scriptsHash string, templateData TemplateData, deployPods bool, diffwatchImage string) appsv1.StatefulSet { + deploymentName := fmt.Sprintf("mdb-debug-%s", templateData.PodName) + + command := ` +set -x +cp /scripts/entrypoint.sh ./entrypoint.sh +chmod +x ./entrypoint.sh +cat entrypoint.sh +./entrypoint.sh +` + return appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: deploymentName, + Namespace: namespace, + Labels: mdbDebugLabels, + }, + Spec: appsv1.StatefulSetSpec{ + Replicas: replicas(deployPods), + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": deploymentName, + }}, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": deploymentName, + }, + Annotations: map[string]string{ + "scripts-hash": scriptsHash, + }, + }, + Spec: corev1.PodSpec{ + ServiceAccountName: "mdb-debug-sa-cluster-admin", + // Affinity rules are not necessary on Kind + // but in cloud (i.e. GKE) we need to co-locate debug pods with appdb pods + // on the same node to allow for multiple mounts to the same PV. + Affinity: &corev1.Affinity{ + PodAffinity: &corev1.PodAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "statefulset.kubernetes.io/pod-name", + Operator: metav1.LabelSelectorOpIn, + Values: []string{templateData.PodName}, + }, + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "data", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: templateData.VolumeName, + }, + }, + }, + { + Name: "scripts", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{Name: fmt.Sprintf("mdb-debug-scripts-%s-%s", templateData.ResourceType, templateData.PodName)}, + }, + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "mdb-debug", + Image: diffwatchImage, + ImagePullPolicy: corev1.PullAlways, + TTY: true, + Command: []string{"/bin/bash", "-c", command}, + VolumeMounts: []corev1.VolumeMount{ + { + MountPath: "/data", + Name: "data", + }, + { + MountPath: "/logs", + SubPath: "mdb-debug", + Name: "data", + }, + { + MountPath: "/scripts", + Name: "scripts", + }, + }, + }, + }, + }, + }, + }, + } +} diff --git a/tools/mdbdebug/cmd/mdbdebug/ops_manager.go b/tools/mdbdebug/cmd/mdbdebug/ops_manager.go new file mode 100644 index 000000000..ade0f47f3 --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/ops_manager.go @@ -0,0 +1,458 @@ +package main + +import ( + "context" + "crypto/sha1" + "encoding/base64" + "fmt" + "github.com/mongodb/mongodb-kubernetes/controllers/operator" + kubernetesClient "github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/kube/client" + "github.com/mongodb/mongodb-kubernetes/pkg/util/architectures" + "golang.org/x/xerrors" + "io" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + + omv1 "github.com/mongodb/mongodb-kubernetes/api/v1/om" + "github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/kube/configmap" + "github.com/mongodb/mongodb-kubernetes/pkg/multicluster" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + apiErrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/pointer" +) + +func debugAppDB(ctx context.Context, opsManager *omv1.MongoDBOpsManager, centralClusterName string, memberCluster multicluster.MemberCluster, deployPods bool, diffwatchImage string) ([]attachCommand, error) { + var attachCommands []attachCommand + for podIdx := 0; podIdx < opsManager.Spec.AppDB.GetMemberClusterSpecByName(memberCluster.Name).Members; podIdx++ { + templateData := appDBTemplateData(opsManager, memberCluster, podIdx) + scriptsHash, err := createAppDBConfigMap(ctx, opsManager, memberCluster, podIdx) + if err != nil { + return nil, xerrors.Errorf("error creating appdb config map in cluster %s: %w", memberCluster.Name, err) + } + + sts := createAppDBStatefulSetObject(opsManager.Namespace, scriptsHash, templateData, deployPods, diffwatchImage) + if err = createStatefulSet(ctx, sts, memberCluster.Client); err != nil { + return nil, xerrors.Errorf("error creating statefulset in cluster %s: %w", memberCluster.Name, err) + } + + attachCommands = append(attachCommands, newAttachCommand(templateData, centralClusterName, memberCluster.Name)) + } + + return attachCommands, nil +} + +func debugOM(ctx context.Context, opsManager *omv1.MongoDBOpsManager, reconcilerHelper operator.OpsManagerReconcilerHelper, centralClusterName string, memberCluster multicluster.MemberCluster, deployPods bool, diffwatchImage string) ([]attachCommand, error) { + var attachCommands []attachCommand + for podIdx := 0; podIdx < memberCluster.Replicas; podIdx++ { + templateData := omTemplateData(opsManager, reconcilerHelper, memberCluster, podIdx) + scriptsHash, err := createOMConfigMap(ctx, opsManager, reconcilerHelper, memberCluster, podIdx) + if err != nil { + return nil, xerrors.Errorf("error creating appdb config map in cluster %s: %w", memberCluster.Name, err) + } + + sts := createOMDeploymentObject(opsManager.Namespace, scriptsHash, templateData, deployPods, diffwatchImage) + if err = createStatefulSet(ctx, sts, memberCluster.Client); err != nil { + return nil, xerrors.Errorf("error creating statefulset in cluster %s: %w", memberCluster.Name, err) + } + + attachCommands = append(attachCommands, newAttachCommand(templateData, centralClusterName, memberCluster.Name)) + } + + if opsManager.Spec.Backup != nil && opsManager.Spec.Backup.Enabled { + for podIdx := 0; podIdx < reconcilerHelper.BackupDaemonMembersForMemberCluster(memberCluster); podIdx++ { + templateData := omBackupDaemonTemplateData(opsManager, reconcilerHelper, memberCluster, podIdx) + scriptsHash, err := createOMBackupDaemonConfigMap(ctx, opsManager, reconcilerHelper, memberCluster, podIdx) + if err != nil { + return nil, xerrors.Errorf("error creating appdb config map in cluster %s: %w", memberCluster.Name, err) + } + + sts := createOMDeploymentObject(opsManager.Namespace, scriptsHash, templateData, deployPods, diffwatchImage) + if err = createStatefulSet(ctx, sts, memberCluster.Client); err != nil { + return nil, xerrors.Errorf("error creating statefulset in cluster %s: %w", memberCluster.Name, err) + } + attachCommands = append(attachCommands, newAttachCommand(templateData, centralClusterName, memberCluster.Name)) + } + } + + return attachCommands, nil +} + +func newAttachCommand(templateData TemplateData, centralClusterName string, memberClusterName string) attachCommand { + debugPodName := fmt.Sprintf("mdb-debug-%s-0", templateData.PodName) + debugStsName := fmt.Sprintf("mdb-debug-%s", templateData.PodName) + if memberClusterName == multicluster.LegacyCentralClusterName { + memberClusterName = centralClusterName + } + attachCommand := attachCommand{ + Command: createKubectlAttachCommand(centralClusterName, memberClusterName, templateData.Namespace, templateData.PodName, debugPodName), + ShortName: templateData.ShortName, + PodName: templateData.PodName, + DebugPodName: debugPodName, + DebugStsName: debugStsName, + ResourceType: templateData.ResourceType, + ResourceName: templateData.ResourceName, + OperatorContext: centralClusterName, + DebugPodContext: memberClusterName, + Namespace: templateData.Namespace, + } + return attachCommand +} + +func createStatefulSet(ctx context.Context, sts appsv1.StatefulSet, client kubernetesClient.Client) error { + stsExists := true + namespacedName := types.NamespacedName{ + Namespace: sts.Namespace, + Name: sts.Name, + } + existingSts := appsv1.StatefulSet{} + err := client.Get(ctx, namespacedName, &existingSts) + if err != nil { + if apiErrors.IsNotFound(err) { + stsExists = false + } else { + return xerrors.Errorf("failed to get statefulset: %v: %w", namespacedName, err) + } + } + + if stsExists { + sts.Spec.Replicas = existingSts.Spec.Replicas + if err := client.Update(ctx, &sts); err != nil { + return xerrors.Errorf("failed to update statefulset: %v: %w", namespacedName, err) + } + } else { + if err := client.Create(ctx, &sts); err != nil { + return xerrors.Errorf("failed to create statefulset: %v: %w", namespacedName, err) + } + } + + return nil +} + +func createAppDBStatefulSetObject(namespace string, scriptsHash string, templateData TemplateData, deployPods bool, diffwatchImage string) appsv1.StatefulSet { + deploymentName := fmt.Sprintf("mdb-debug-%s", templateData.PodName) + + command := ` +set -x +cp /scripts/entrypoint.sh ./entrypoint.sh +chmod +x ./entrypoint.sh +cat entrypoint.sh +./entrypoint.sh +` + return appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: deploymentName, + Namespace: namespace, + Labels: mdbDebugLabels, + }, + Spec: appsv1.StatefulSetSpec{ + Replicas: replicas(deployPods), + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": deploymentName, + }}, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": deploymentName, + }, + Annotations: map[string]string{ + "scripts-hash": scriptsHash, + }, + }, + Spec: corev1.PodSpec{ + ServiceAccountName: "mdb-debug-sa-cluster-admin", + // Affinity rules are not necessary on Kind + // but in cloud (i.e. GKE) we need to co-locate debug pods with appdb pods + // on the same node to allow for multiple mounts to the same PV. + Affinity: &corev1.Affinity{ + PodAffinity: &corev1.PodAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "statefulset.kubernetes.io/pod-name", + Operator: metav1.LabelSelectorOpIn, + Values: []string{templateData.PodName}, + }, + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "data", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: templateData.VolumeName, + }, + }, + }, + { + Name: "automation-config", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + DefaultMode: pointer.Int32(416), + SecretName: fmt.Sprintf("%s-config", templateData.ResourceName), + }, + }, + }, + { + Name: "scripts", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{Name: fmt.Sprintf("mdb-debug-scripts-%s", templateData.PodName)}, + }, + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "mdb-debug", + Image: diffwatchImage, + ImagePullPolicy: corev1.PullAlways, + TTY: true, + Command: []string{"/bin/bash", "-c", command}, + VolumeMounts: []corev1.VolumeMount{ + { + MountPath: "/data", + Name: "data", + }, + { + MountPath: "/scripts", + Name: "scripts", + }, + { + MountPath: "/data/ac", + Name: "automation-config", + }, + }, + }, + }, + }, + }, + }, + } +} + +func replicas(deployPods bool) *int32 { + if deployPods { + return ptr.To(int32(1)) + } + return ptr.To(int32(0)) +} + +func createOMDeploymentObject(namespace string, scriptsHash string, templateData TemplateData, deployPods bool, diffwatchImage string) appsv1.StatefulSet { + deploymentName := fmt.Sprintf("mdb-debug-%s", templateData.PodName) + + command := ` +set -x +cp /scripts/entrypoint.sh ./entrypoint.sh +chmod +x ./entrypoint.sh +cat entrypoint.sh +./entrypoint.sh +` + + return appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: deploymentName, + Namespace: namespace, + Labels: mdbDebugLabels, + }, + Spec: appsv1.StatefulSetSpec{ + Replicas: replicas(deployPods), + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": deploymentName, + }}, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": deploymentName, + }, + Annotations: map[string]string{ + "scripts-hash": scriptsHash, + }, + }, + Spec: corev1.PodSpec{ + ServiceAccountName: "mdb-debug-sa-cluster-admin", + Volumes: []corev1.Volume{ + { + Name: "data", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + //PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + // + // ClaimName: fmt.Sprintf("data-%s", templateData.PodName), + //}, + }, + }, + { + Name: "scripts", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{Name: fmt.Sprintf("mdb-debug-scripts-%s", templateData.PodName)}, + }, + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "mdb-debug", + Image: diffwatchImage, + ImagePullPolicy: corev1.PullAlways, + TTY: true, + Command: []string{"/bin/bash", "-c", command}, + VolumeMounts: []corev1.VolumeMount{ + { + MountPath: "/data", + Name: "data", + }, + { + MountPath: "/scripts", + Name: "scripts", + }, + }, + }, + }, + }, + }, + }, + } +} + +func createAppDBConfigMap(ctx context.Context, opsManager *omv1.MongoDBOpsManager, memberCluster multicluster.MemberCluster, podIdx int) (string, error) { + templateData := appDBTemplateData(opsManager, memberCluster, podIdx) + appDBEntryPoint, err := renderTemplate("appdb_entrypoint.sh.tpl", templateData) + if err != nil { + return "", xerrors.Errorf("failed to render om_entrypoint.sh.tpl: %w", err) + } + + tmuxSession, err := renderTemplate("appdb_tmux_session.yaml.tpl", templateData) + if err != nil { + return "", xerrors.Errorf("failed to render om_tmux_session.yaml.tpl: %w", err) + } + + return createConfigMap(ctx, opsManager.Namespace, memberCluster.Client, appDBConfigMapName(opsManager.Spec.AppDB, memberCluster, podIdx), appDBEntryPoint, tmuxSession) +} + +func createOMConfigMap(ctx context.Context, opsManager *omv1.MongoDBOpsManager, reconcilerHelper operator.OpsManagerReconcilerHelper, memberCluster multicluster.MemberCluster, podIdx int) (string, error) { + templateData := omTemplateData(opsManager, reconcilerHelper, memberCluster, podIdx) + entryPoint, err := renderTemplate("om_entrypoint.sh.tpl", templateData) + if err != nil { + return "", xerrors.Errorf("failed to render om_entrypoint.sh.tpl: %w", err) + } + + tmuxSession, err := renderTemplate("om_tmux_session.yaml.tpl", templateData) + if err != nil { + return "", xerrors.Errorf("failed to render om_tmux_session.yaml.tpl: %w", err) + } + + return createConfigMap(ctx, opsManager.Namespace, memberCluster.Client, omConfigMapName(reconcilerHelper, memberCluster, podIdx), entryPoint, tmuxSession) +} + +func createOMBackupDaemonConfigMap(ctx context.Context, opsManager *omv1.MongoDBOpsManager, reconcilerHelper operator.OpsManagerReconcilerHelper, memberCluster multicluster.MemberCluster, podIdx int) (string, error) { + templateData := omBackupDaemonTemplateData(opsManager, reconcilerHelper, memberCluster, podIdx) + entryPoint, err := renderTemplate("om_backup_daemon_entrypoint.sh.tpl", templateData) + if err != nil { + return "", xerrors.Errorf("failed to render om_entrypoint.sh.tpl: %w", err) + } + + tmuxSession, err := renderTemplate("om_backup_daemon_tmux_session.yaml.tpl", templateData) + if err != nil { + return "", xerrors.Errorf("failed to render om_tmux_session.yaml.tpl: %w", err) + } + + return createConfigMap(ctx, opsManager.Namespace, memberCluster.Client, omBackupDaemonConfigMapName(reconcilerHelper, memberCluster, podIdx), entryPoint, tmuxSession) +} + +func createConfigMap(ctx context.Context, namespace string, client client.Client, configMapName string, entrypointScript string, tmuxSessionScript string) (string, error) { + hasher := sha1.New() + _, _ = io.WriteString(hasher, entrypointScript+tmuxSessionScript) + scriptsHash := base64.StdEncoding.EncodeToString(hasher.Sum(nil)) + + configMap := corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: configMapName, + Namespace: namespace, + Annotations: map[string]string{ + "scripts-hash": scriptsHash, + }, + Labels: mdbDebugLabels, + }, + Data: map[string]string{ + "entrypoint.sh": entrypointScript, + "session.yaml": tmuxSessionScript, + }, + } + if err := configmap.CreateOrUpdate(ctx, kubernetesClient.NewClient(client), configMap); err != nil { + return "", xerrors.Errorf("failed to update config map %s: %w", configMap.Name, err) + } + + return scriptsHash, nil +} + +func appDBTemplateData(opsManager *omv1.MongoDBOpsManager, memberCluster multicluster.MemberCluster, podIdx int) TemplateData { + return TemplateData{ + Namespace: opsManager.Namespace, + ResourceName: opsManager.Spec.AppDB.Name(), + ResourceType: "om", + StsName: opsManager.Spec.AppDB.NameForCluster(memberCluster.Index), + PodName: fmt.Sprintf("%s-%d", opsManager.Spec.AppDB.NameForCluster(memberCluster.Index), podIdx), + PodIdx: podIdx, + ClusterIdx: memberCluster.Index, + ShortName: fmt.Sprintf("appdb-%d-%d", memberCluster.Index, podIdx), + StaticArch: architectures.IsRunningStaticArchitecture(opsManager.Annotations), + ContainerName: "mongodb-agent", + VolumeName: fmt.Sprintf("data-%s-%d", opsManager.Spec.AppDB.NameForCluster(memberCluster.Index), podIdx), + BaseLogDir: "/data/logs", + } +} + +func appDBConfigMapName(appDB omv1.AppDBSpec, memberCluster multicluster.MemberCluster, podIdx int) string { + return fmt.Sprintf("mdb-debug-scripts-%s-%d", appDB.NameForCluster(memberCluster.Index), podIdx) +} + +func omTemplateData(opsManager *omv1.MongoDBOpsManager, reconcilerHelper operator.OpsManagerReconcilerHelper, memberCluster multicluster.MemberCluster, podIdx int) TemplateData { + return TemplateData{ + Namespace: opsManager.Namespace, + ResourceName: opsManager.Name, + ResourceType: "om", + StsName: reconcilerHelper.OpsManagerStatefulSetNameForMemberCluster(memberCluster), + PodName: fmt.Sprintf("%s-%d", reconcilerHelper.OpsManagerStatefulSetNameForMemberCluster(memberCluster), podIdx), + PodIdx: podIdx, + ClusterIdx: memberCluster.Index, + ShortName: fmt.Sprintf("om-%d-%d", memberCluster.Index, podIdx), + StaticArch: architectures.IsRunningStaticArchitecture(opsManager.Annotations), + ContainerName: "mongodb-ops-manager", + } +} + +func omBackupDaemonTemplateData(opsManager *omv1.MongoDBOpsManager, reconcilerHelper operator.OpsManagerReconcilerHelper, memberCluster multicluster.MemberCluster, podIdx int) TemplateData { + return TemplateData{ + Namespace: opsManager.Namespace, + ResourceName: opsManager.Name, + ResourceType: "om", + StsName: reconcilerHelper.BackupDaemonStatefulSetNameForMemberCluster(memberCluster), + PodName: fmt.Sprintf("%s-%d", reconcilerHelper.BackupDaemonStatefulSetNameForMemberCluster(memberCluster), podIdx), + PodIdx: podIdx, + ClusterIdx: memberCluster.Index, + ShortName: fmt.Sprintf("om-bd-%d-%d", memberCluster.Index, podIdx), + StaticArch: architectures.IsRunningStaticArchitecture(opsManager.Annotations), + ContainerName: "mongodb-ops-manager", + } +} + +func omConfigMapName(reconcilerHelper operator.OpsManagerReconcilerHelper, memberCluster multicluster.MemberCluster, podIdx int) string { + return fmt.Sprintf("mdb-debug-scripts-%s-%d", reconcilerHelper.OpsManagerStatefulSetNameForMemberCluster(memberCluster), podIdx) +} + +func omBackupDaemonConfigMapName(reconcilerHelper operator.OpsManagerReconcilerHelper, memberCluster multicluster.MemberCluster, podIdx int) string { + return fmt.Sprintf("mdb-debug-scripts-%s-%d", reconcilerHelper.BackupDaemonStatefulSetNameForMemberCluster(memberCluster), podIdx) +} diff --git a/tools/mdbdebug/cmd/mdbdebug/reconcilers.go b/tools/mdbdebug/cmd/mdbdebug/reconcilers.go new file mode 100644 index 000000000..90dcda873 --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/reconcilers.go @@ -0,0 +1,218 @@ +package main + +import ( + "context" + mdbv1 "github.com/mongodb/mongodb-kubernetes/api/v1/mdb" + "github.com/mongodb/mongodb-kubernetes/api/v1/mdbmulti" + omv1 "github.com/mongodb/mongodb-kubernetes/api/v1/om" + searchv1 "github.com/mongodb/mongodb-kubernetes/api/v1/search" + mdbcv1 "github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/api/v1" + kubernetesClient "github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/kube/client" + "go.uber.org/zap" + "golang.org/x/xerrors" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "time" +) + +type mongoDBReconciler struct { + namespace string + operatorClusterName string + clusterMap map[string]client.Client + deploy bool + diffwatchImage string +} + +func newMongoDBReconciler(operatorClusterName string, namespace string, clusterMap map[string]client.Client, deployPods bool, diffwatchImage string) *mongoDBReconciler { + return &mongoDBReconciler{ + namespace: namespace, + operatorClusterName: operatorClusterName, + clusterMap: clusterMap, + deploy: deployPods, + diffwatchImage: diffwatchImage, + } +} + +func (r *mongoDBReconciler) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) { + logger := zap.S() + mdb := mdbv1.MongoDB{} + logger.Debugf("Received MongoDB reconcile event: %+v", request) + + operatorClient := r.clusterMap[r.operatorClusterName] + if err := operatorClient.Get(ctx, request.NamespacedName, &mdb); err != nil { + return reconcile.Result{RequeueAfter: time.Second * 10}, xerrors.Errorf("error getting MongoDB resource %+v: %w", request.NamespacedName, err) + } + + logger.Debugf("Command line equivalent: mdbdebug --type mdb --context %s --namespace %s --name %s", r.operatorClusterName, request.Namespace, request.Name) + attachCommands, err := debugMongoDB(ctx, r.clusterMap, r.operatorClusterName, request.Namespace, request.Name, r.deploy, r.diffwatchImage) + if err != nil { + return reconcile.Result{RequeueAfter: time.Second * 10}, xerrors.Errorf("error deploying debug for MongoDB resource %+v: %w", request.NamespacedName, err) + } + + if err = createOrUpdateAttachCommandsCM(ctx, logger, request.Namespace, request.Name, "mdb", attachCommands, operatorClient); err != nil { + return reconcile.Result{RequeueAfter: time.Second * 10}, err + } + + return reconcile.Result{}, nil +} + +type mongoDBMultiClusterReconciler struct { + namespace string + operatorClusterName string + clusterMap map[string]client.Client + diffwatchImage string +} + +func newMongoDBMultiClusterReconciler(operatorClusterName string, namespace string, clusterMap map[string]client.Client, diffwatchImage string) *mongoDBMultiClusterReconciler { + return &mongoDBMultiClusterReconciler{ + namespace: namespace, + operatorClusterName: operatorClusterName, + clusterMap: clusterMap, + diffwatchImage: diffwatchImage, + } +} + +func (r *mongoDBMultiClusterReconciler) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) { + logger := zap.S() + mdb := mdbmulti.MongoDBMultiCluster{} + logger.Debugf("Received MongoDBMultiCluster reconcile event: %+v", request) + + operatorClient := r.clusterMap[r.operatorClusterName] + if err := operatorClient.Get(ctx, request.NamespacedName, &mdb); err != nil { + return reconcile.Result{RequeueAfter: time.Second * 10}, xerrors.Errorf("error getting MongoDBMultiCluster resource %+v: %w", request.NamespacedName, err) + } + + attachCommands, err := debugMongoDB(ctx, r.clusterMap, r.operatorClusterName, request.Namespace, request.Name, true, r.diffwatchImage) + if err != nil { + return reconcile.Result{RequeueAfter: time.Second * 10}, xerrors.Errorf("error deploying debug for MongoDBMultiCluster resource %+v: %w", request.NamespacedName, err) + } + + if err = createOrUpdateAttachCommandsCM(ctx, logger, request.Namespace, request.Name, "", attachCommands, operatorClient); err != nil { + return reconcile.Result{RequeueAfter: time.Second * 10}, err + } + + return reconcile.Result{}, nil +} + +type opsManagerReconciler struct { + namespace string + operatorClusterName string + clusterMap map[string]client.Client + deploy bool + diffwatchImage string +} + +func newOpsManagerReconciler(operatorClusterName string, namespace string, clusterMap map[string]client.Client, deployPods bool, diffwatchImage string) *opsManagerReconciler { + return &opsManagerReconciler{ + namespace: namespace, + operatorClusterName: operatorClusterName, + clusterMap: clusterMap, + deploy: deployPods, + diffwatchImage: diffwatchImage, + } +} + +func (r *opsManagerReconciler) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) { + logger := zap.S() + om := omv1.MongoDBOpsManager{} + logger.Debugf("Received MongoDBOpsManager reconcile event: %+v", request) + + operatorClient := r.clusterMap[r.operatorClusterName] + if err := operatorClient.Get(ctx, request.NamespacedName, &om); err != nil { + return reconcile.Result{RequeueAfter: time.Second * 10}, xerrors.Errorf("error getting MongoDBOpsManager resource %+v: %w", request.NamespacedName, err) + } + + logger.Debugf("Command line equivalent: mdbdebug --type om --context %s --namespace %s --name %s", r.operatorClusterName, request.Namespace, request.Name) + attachCommands, err := debugOpsManager(ctx, r.clusterMap, r.operatorClusterName, request.Namespace, request.Name, r.deploy, r.diffwatchImage) + if err != nil { + return reconcile.Result{RequeueAfter: time.Second * 10}, xerrors.Errorf("error deploying debug for MongoDBOpsManager resource %+v: %w", request.NamespacedName, err) + } + + if err = createOrUpdateAttachCommandsCM(ctx, logger, request.Namespace, request.Name, "om", attachCommands, operatorClient); err != nil { + return reconcile.Result{RequeueAfter: time.Second * 10}, err + } + + return reconcile.Result{}, nil +} + +type mongoDBCommunityReconciler struct { + namespace string + operatorClusterName string + client client.Client + deploy bool + diffwatchImage string +} + +func newMongoDBCommunityReconciler(operatorClusterName string, namespace string, client client.Client, deployPods bool, diffwatchImage string) *mongoDBCommunityReconciler { + return &mongoDBCommunityReconciler{ + namespace: namespace, + operatorClusterName: operatorClusterName, + client: client, + deploy: deployPods, + diffwatchImage: diffwatchImage, + } +} + +func (r *mongoDBCommunityReconciler) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) { + logger := zap.S() + mdb := mdbcv1.MongoDBCommunity{} + logger.Debugf("Received MongoDBCommunity reconcile event: %+v", request) + + operatorClient := r.client + if err := operatorClient.Get(ctx, request.NamespacedName, &mdb); err != nil { + return reconcile.Result{RequeueAfter: time.Second * 10}, xerrors.Errorf("error getting MongoDB resource %+v: %w", request.NamespacedName, err) + } + + logger.Debugf("Command line equivalent: mdbdebug --type mdb --context %s --namespace %s --name %s", r.operatorClusterName, request.Namespace, request.Name) + attachCommands, err := debugMongoDBCommunity(ctx, request.Namespace, request.Name, r.operatorClusterName, kubernetesClient.NewClient(operatorClient), r.deploy, r.diffwatchImage) + if err != nil { + return reconcile.Result{RequeueAfter: time.Second * 10}, xerrors.Errorf("error deploying debug for MongoDB resource %+v: %w", request.NamespacedName, err) + } + + if err = createOrUpdateAttachCommandsCM(ctx, logger, request.Namespace, request.Name, "mdbc", attachCommands, operatorClient); err != nil { + return reconcile.Result{RequeueAfter: time.Second * 10}, err + } + + return reconcile.Result{}, nil +} + +type mongoDBSearchReconciler struct { + namespace string + operatorClusterName string + client client.Client + deploy bool + diffwatchImage string +} + +func newMongoDBSearchReconciler(operatorClusterName string, namespace string, client client.Client, deployPods bool, diffwatchImage string) *mongoDBSearchReconciler { + return &mongoDBSearchReconciler{ + namespace: namespace, + operatorClusterName: operatorClusterName, + client: client, + deploy: deployPods, + diffwatchImage: diffwatchImage, + } +} + +func (r *mongoDBSearchReconciler) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) { + logger := zap.S() + mdb := searchv1.MongoDBSearch{} + logger.Debugf("Received MongoDBSearch reconcile event: %+v", request) + + operatorClient := r.client + if err := operatorClient.Get(ctx, request.NamespacedName, &mdb); err != nil { + return reconcile.Result{RequeueAfter: time.Second * 10}, xerrors.Errorf("error getting MongoDBSearch resource %+v: %w", request.NamespacedName, err) + } + + logger.Debugf("Command line equivalent: mdbdebug --type mdbs --context %s --namespace %s --name %s", r.operatorClusterName, request.Namespace, request.Name) + attachCommands, err := debugMongoDBSearch(ctx, request.Namespace, request.Name, r.operatorClusterName, kubernetesClient.NewClient(operatorClient), r.deploy, r.diffwatchImage) + if err != nil { + return reconcile.Result{RequeueAfter: time.Second * 10}, xerrors.Errorf("error deploying debug for MongoDBSearch resource %+v: %w", request.NamespacedName, err) + } + + if err = createOrUpdateAttachCommandsCM(ctx, logger, request.Namespace, request.Name, "mdbs", attachCommands, operatorClient); err != nil { + return reconcile.Result{RequeueAfter: time.Second * 10}, err + } + + return reconcile.Result{}, nil +} diff --git a/tools/mdbdebug/cmd/mdbdebug/templates.go b/tools/mdbdebug/cmd/mdbdebug/templates.go new file mode 100644 index 000000000..c5add6c4a --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/templates.go @@ -0,0 +1,50 @@ +package main + +import ( + "embed" + "fmt" + "strings" + "sync" + "text/template" +) + +//go:embed templates/* +var templateFS embed.FS + +type TemplateData struct { + Namespace string + ResourceName string + ResourceType string + StsName string + PodName string + VolumeName string + PodIdx int + ClusterIdx int + ShortName string + TLSEnabled bool + PodFQDN string + StaticArch bool + ContainerName string + MongoDBCommunity bool + BaseLogDir string +} + +var once sync.Once +var tmpl *template.Template + +func renderTemplate(templateName string, data TemplateData) (string, error) { + once.Do(func() { + var err error + tmpl, err = template.ParseFS(templateFS, fmt.Sprintf("templates/*.tpl")) + if err != nil { + panic(err) + } + }) + + str := strings.Builder{} + if err := tmpl.ExecuteTemplate(&str, templateName, data); err != nil { + return "", err + } + + return str.String(), nil +} diff --git a/tools/mdbdebug/cmd/mdbdebug/templates/appdb_entrypoint.sh.tpl b/tools/mdbdebug/cmd/mdbdebug/templates/appdb_entrypoint.sh.tpl new file mode 100644 index 000000000..78c8d2df4 --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/templates/appdb_entrypoint.sh.tpl @@ -0,0 +1,83 @@ +#!/usr/bin/env bash + +set -Eeou pipefail + +{{ template "common.sh.tpl" }} + +set -x + +tls_enabled="{{.TLSEnabled}}" +static_arch="{{.StaticArch}}" +container_name="{{.ContainerName}}" +mongosh_container_name="mongodb-agent" + +if [[ "${static_arch}" == "true" ]]; then + mongosh_path='/bin/mongosh' +else + mongosh_path='/var/lib/mongodb-mms-automation/mongosh-linux-x86_64-2.2.4/bin/mongosh' +fi + +sigterm() { + exit 0 +} +trap sigterm SIGTERM + +export LANG=en_GB.UTF-8 +tmuxp load -d -y "/scripts/session.yaml" + +base_log_dir="{{.BaseLogDir}}" +base_dir="${base_log_dir}/mdb-debug" +mongod_container_log_file="${base_log_dir}/mongod_container.log" +agent_container_log_file="${base_log_dir}/agent_container.log" + +cluster_config_file="${base_dir}/ac/cluster-config.json" +health_file="${base_dir}/health/health.json" +pod_file="${base_dir}/pod/pod.json" +sts_file="${base_dir}/sts/sts.json" +readiness_file="${base_dir}/readiness/readiness.log.json" +state_file="${base_dir}/state/state.json" +mongod_config_file="${base_dir}/mongod_config/config.json" +cr_file="${base_dir}/cr/cr.json" + +# we have to wait for the db pod to create /data/logs otherwise we create it with incorrect permissions +while [ ! -d "${base_log_dir}" ]; do + echo "Waiting for ${base_log_dir} to be initialized by the db pod" + sleep 1 +done + +mkdir -p "$(dirname "${cluster_config_file}")" +mkdir -p "$(dirname "${health_file}")" +mkdir -p "$(dirname "${pod_file}")" +mkdir -p "$(dirname "${sts_file}")" +mkdir -p "$(dirname "${readiness_file}")" +mkdir -p "$(dirname "${mongod_config_file}")" +mkdir -p "$(dirname "${cr_file}")" + +tail_pod_log "{{.Namespace}}" "{{.PodName}}" "mongod" "${mongod_container_log_file}" & +tail_pod_log "{{.Namespace}}" "{{.PodName}}" "mongodb-agent" "${agent_container_log_file}" & + +set +e +while true; do + kubectl_get_json "{{.Namespace}}" "{{.ResourceType}}" "{{.ResourceName}}" "${cr_file}" 2>&1 | prepend "get_{{.ResourceType}}_cr" + + kubectl exec -n "{{.Namespace}}" -c "mongodb-agent" "{{.PodName}}" -- cat /var/log/mongodb-mms-automation/healthstatus/agent-health-status.json | jq . >${health_file}.tmp + mv ${health_file}.tmp ${health_file} + + kubectl get pod -n "{{.Namespace}}" "{{.PodName}}" -o json >${pod_file}.tmp + mv ${pod_file}.tmp ${pod_file} + + kubectl get sts -n "{{.Namespace}}" "{{.StsName}}" -o json >${sts_file}.tmp + mv ${sts_file}.tmp ${sts_file} + + tail -n 100 "${base_log_dir}/readiness.log" | jq --color-output -c '.' >"${readiness_file}" + + cp "/data/ac/cluster-config.json" "${cluster_config_file}" + + kubectl_get_state_json "{{.Namespace}}" "{{.ResourceName}}" "${state_file}" + + get_file "{{.Namespace}}" "{{.PodName}}" "mongod" "/data/automation-mongod.conf" "${mongod_config_file}.yaml.tmp" + yq . "${mongod_config_file}.yaml.tmp" -o json >"${mongod_config_file}.tmp" + mv "${mongod_config_file}.tmp" "${mongod_config_file}" + + sleep 1 +done diff --git a/tools/mdbdebug/cmd/mdbdebug/templates/appdb_tmux_session.yaml.tpl b/tools/mdbdebug/cmd/mdbdebug/templates/appdb_tmux_session.yaml.tpl new file mode 100644 index 000000000..86628c3c1 --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/templates/appdb_tmux_session.yaml.tpl @@ -0,0 +1,69 @@ +session_name: mdb-debug +global_options: + history-limit: 50000 + mouse: on + pane-border-status: top + pane-border-format: "#{pane_index}: #{pane_title}" +window_options: + remain-on-exit: on +shell_command_before: + - | + function retry_cmd() { + local cmd="$1" + local delay="${2:-3}" + + while true; do + eval "${cmd}" + echo "Retrying... ${cmd}" + sleep "${delay}" + done + } +windows: + - window_name: json + layout: tiled + panes: + - shell_command: + - tmux select-pane -t 0.0 -T '{{.ShortName}} (CR)' + - retry_cmd "diffwatch --file {{.BaseLogDir}}/mdb-debug/cr/cr.json --destDir {{.BaseLogDir}}/mdb-debug/cr -C=5 | tee -a {{.BaseLogDir}}/mdb-debug/cr/cr.log" + - shell_command: + - tmux select-pane -t 0.1 -T '{{.ShortName}} (ac)' + - retry_cmd "diffwatch --file {{.BaseLogDir}}/mdb-debug/ac/cluster-config.json --destDir {{.BaseLogDir}}/mdb-debug/ac -C=3 | tee -a {{.BaseLogDir}}/mdb-debug/ac/ac.log" + - shell_command: + - tmux select-pane -t 0.2 -T '{{.ShortName}} (sts)' + - retry_cmd "diffwatch --file {{.BaseLogDir}}/mdb-debug/sts/sts.json --destDir {{.BaseLogDir}}/mdb-debug/sts -C=3 | tee -a {{.BaseLogDir}}/mdb-debug/sts/sts.log" + - shell_command: + - tmux select-pane -t 0.3 -T '{{.ShortName}} (health)' + - retry_cmd "diffwatch --file {{.BaseLogDir}}/mdb-debug/health/health.json --destDir {{.BaseLogDir}}/mdb-debug/health -C=3 --ignore LastMongoUpTime | tee -a {{.BaseLogDir}}/mdb-debug/health/health.log" + - shell_command: + - tmux select-pane -t 0.4 -T '{{.ShortName}} (pod)' + - retry_cmd "diffwatch --file {{.BaseLogDir}}/mdb-debug/pod/pod.json --destDir {{.BaseLogDir}}/mdb-debug/pod -C=3 | tee -a {{.BaseLogDir}}/mdb-debug/pod/pod.log" + - shell_command: + - tmux select-pane -t 0.5 -T 'readiness.log' + - retry_cmd "less --follow-name -R +F {{.BaseLogDir}}/mdb-debug/readiness/readiness.log.json" + - shell_command: + - tmux select-pane -t 0.6 -T '{{.ShortName}} (state)' + - retry_cmd "diffwatch --file {{.BaseLogDir}}/mdb-debug/state/state.json --destDir {{.BaseLogDir}}/mdb-debug/state | tee -a {{.BaseLogDir}}/mdb-debug/state/state.log" + - shell_command: + - tmux select-pane -t 0.7 -T '{{.ShortName}} (mongod cfg)' + - retry_cmd "diffwatch --file {{.BaseLogDir}}/mdb-debug/mongod_config/config.json --destDir {{.BaseLogDir}}/mdb-debug/mongod_config/ -C=5 | tee -a {{.BaseLogDir}}/mdb-debug/mongod_config/mongod_config.log" + - window_name: logs{{.PodIdx}} + layout: tiled + panes: + - shell_command: + - tmux select-pane -t 1.0 -T 'automation-agent.log' + - retry_cmd "lnav {{.BaseLogDir}}/automation-agent.log" + - shell_command: + - tmux select-pane -t 1.1 -T 'automation-agent-verbose.log' + - retry_cmd "lnav {{.BaseLogDir}}/automation-agent-verbose.log" + - shell_command: + - tmux select-pane -t 1.2 -T 'mongodb.log' + - retry_cmd "lnav {{.BaseLogDir}}/mongodb.log" + - shell_command: + - tmux select-pane -t 1.3 -T 'readiness.log' + - retry_cmd "lnav {{.BaseLogDir}}/readiness.log" + - shell_command: + - tmux select-pane -t 1.4 -T 'mongod_container.log' + - retry_cmd "lnav {{.BaseLogDir}}/mongod_container.log" + - shell_command: + - tmux select-pane -t 1.5 -T 'agent_container.log' + - retry_cmd "lnav {{.BaseLogDir}}/agent_container.log" diff --git a/tools/mdbdebug/cmd/mdbdebug/templates/common.sh.tpl b/tools/mdbdebug/cmd/mdbdebug/templates/common.sh.tpl new file mode 100644 index 000000000..64414dd95 --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/templates/common.sh.tpl @@ -0,0 +1,156 @@ +export LANG=en_GB.UTF-8 + + +sigterm() { + exit 0 +} + +trap sigterm SIGTERM + +tail_file() { + namespace=$1 + pod_name=$2 + container_name=$3 + pod_file_path=$4 + log_file_path=$5 + + while true; do + kubectl exec -n "${namespace}" -c "${container_name}" "${pod_name}" -- tail -F "${pod_file_path}" >>"${log_file_path}" || { + echo "failed to tail ${pod_name}:${pod_file_path} to ${log_file_path}" + sleep 3 + } + done +} + +exec_cmd() { + namespace=$1 + pod_name=$2 + container_name=$3 + cmd=$4 + + # shellcheck disable=SC2086 + kubectl exec -n "${namespace}" -c "${container_name}" "${pod_name}" -- bash -c "${cmd}" || { + echo "failed to exec in ${pod_name} cmd: ${cmd}" + } +} + +get_file() { + namespace=$1 + pod_name=$2 + container_name=$3 + src_file_path=$4 + dst_file_path=$5 + + kubectl exec -n "${namespace}" "${pod_name}" -c "${container_name}" -- cat "${src_file_path}" > "${dst_file_path}" || { + echo "failed to get file ${pod_name}:${src_file_path} to ${dst_file_path}" + return 1 + } + return 0 +} + +tail_pod_log() { + namespace=$1 + pod_name=$2 + container_name=$3 + log_file_path=$4 + + while true; do + kubectl logs -n "${namespace}" -c "${container_name}" "${pod_name}" --tail=0 -f >>"${log_file_path}" || { + echo "failed to tail logs from ${pod_name} to ${log_file_path}" + sleep 3 + } + done +} + +kubectl_get_json() { + namespace=$1 + resource_type=$2 + resource_name=$3 + file_path=$4 + + kubectl get "${resource_type}" "${resource_name}" -n "${namespace}" -o json >"${file_path}.tmp" 2>"${file_path}.error.tmp" || { + echo "{\"error_message\":\"$(cat "${file_path}.error.tmp")\"}" >"${file_path}.tmp" + } + mv "${file_path}.tmp" "${file_path}" +} + +kubectl_get_state_json() { + namespace=$1 + resource_name=$2 + file_path=$3 + + kubectl get cm "${resource_name}-state" -n "${namespace}" -o json | jq -r '.data.state' | jq . >"${file_path}.tmp" 2>"${file_path}.error.tmp" || { + echo "{\"error_message\":\"$(cat "${file_path}.error.tmp")\"}" >"${file_path}.tmp" + } + mv "${file_path}.tmp" "${file_path}" +} + +get_om_creds() { + namespace=$1 + secret=$2 + + kubectl get secret "${secret}" -n "${namespace}" -o json | jq -r '.data | with_entries(.value |= @base64d) | if .user then "\(.user):\(.publicApiKey)" else "\(.publicKey):\(.privateKey)" end' +} + +get_ac() { + namespace=$1 + base_url=$2 + project_id=$3 + agent_api_key=$4 + + curl -s -k -u "${project_id}:${agent_api_key}" "${base_url}/agents/api/automation/conf/v1/${project_id}?debug=true" 2>/dev/null | jq . +} + +get_project_id() { + base_url=$1 + om_creds=$2 + project_name=$3 + + curl -s -k -u "${om_creds}" --digest "${base_url}/api/public/v1.0/groups/byName/${project_name}" 2>/dev/null | jq -r .id +} + +get_project_data() { + namespace=$1 + resource=$2 + + resource_json=$(kubectl get -n "${namespace}" mdb "${resource}" -o json) + project_configmap=$(jq -r 'if .spec.cloudManager then .spec.cloudManager.configMapRef.name else .spec.opsManager.configMapRef.name end' <<<"${resource_json}") + + creds_secret=$(jq -r '.spec.credentials' <<<"${resource_json}") + om_creds=$(get_om_creds "${namespace}" "${creds_secret}") + + project_configmap_json=$(kubectl get configmap "${project_configmap}" -n "${namespace}" -o json) + org_id=$(jq -r '.data.orgId' <<<"${project_configmap_json}") + project_name=$(jq -r '.data.projectName' <<<"${project_configmap_json}") + base_url=$(jq -r '.data.baseUrl' <<<"${project_configmap_json}") + + project_id=$(get_project_id "${base_url}" "${om_creds}" "${project_name}") + + group_secret_json=$(kubectl get -n "${namespace}" secret "${project_id}-group-secret" -o json) + agent_api_key=$(jq -r '.data | with_entries(.value |= @base64d) | .agentApiKey' <<<"${group_secret_json}") + + echo "${org_id}|${project_name}|${base_url}|${project_id}|${agent_api_key}" +} + +get_project_data2() { + namespace=$1 + resource_name=$2 + pod_name=$3 + + project_id=$(kubectl get -n "${namespace}" pod "${pod_name}" -o json | jq -r '.spec.containers[] | select(.name == "mongodb-enterprise-database" or .name == "mongodb-agent").env[] | select(.name == "GROUP_ID") | .value' | head -n 1) + base_url=$(kubectl get -n "${namespace}" pod "${pod_name}" -o json | jq -r '.spec.containers[] | select(.name == "mongodb-enterprise-database" or .name == "mongodb-agent").env[] | select(.name == "BASE_URL") | .value' | head -n 1) + group_secret_json=$(kubectl get -n "${namespace}" secret "${project_id}-group-secret" -o json) + agent_api_key=$(jq -r '.data | with_entries(.value |= @base64d) | .agentApiKey' <<<"${group_secret_json}") + + echo "${org_id}|${base_url}|${project_id}|${agent_api_key}" +} + +trim_config() { + cfg_file=$1 + jq '. | del(.mongoshVersion, .mongoDbVersions, .mongoDbToolsVersion, .clientPIT)' <"${cfg_file}" +} + +prepend() { + prefix=$1 + awk -v prefix="${prefix}" '{printf "%s: %s\n", prefix, $0}' +} diff --git a/tools/mdbdebug/cmd/mdbdebug/templates/mongos_entrypoint.sh.tpl b/tools/mdbdebug/cmd/mdbdebug/templates/mongos_entrypoint.sh.tpl new file mode 100644 index 000000000..60fd0827a --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/templates/mongos_entrypoint.sh.tpl @@ -0,0 +1,113 @@ +#!/usr/bin/env bash + +set -Eeou pipefail + +set -x + +{{ template "common.sh.tpl" }} + +tls_enabled="{{.TLSEnabled}}" +static_arch="{{.StaticArch}}" +container_name="{{.ContainerName}}" +mongosh_container_name="mongodb-enterprise-database" + +if [[ "${static_arch}" == "true" ]]; then + mongosh_path='/bin/mongosh' +else + mongosh_path='/var/lib/mongodb-mms-automation/mongosh-linux-x86_64-2.2.4/bin/mongosh' +fi + + +tmuxp load -d -y "/scripts/session.yaml" +base_dir="/data/logs/mdb-debug" + +logs_dir="${base_dir}/logs" +cr_file="${base_dir}/mdb/mdb.json" +pod_file="${base_dir}/pod/pod.json" +sts_file="${base_dir}/sts/sts.json" +health_file="${base_dir}/health/health.json" +readiness_file="${base_dir}/readiness/readiness.log.json" +cluster_config_file="${base_dir}/ac/cluster-config.json" +cluster_config_tmp_file="${base_dir}/ac_tmp/cluster-config.json" +sh_status_file="${base_dir}/sh/status.json" +mongod_config_file="${base_dir}/mongod_config/config.json" + +mkdir -p "${logs_dir}" +mkdir -p "$(dirname "${cr_file}")" +mkdir -p "$(dirname "${pod_file}")" +mkdir -p "$(dirname "${sts_file}")" +mkdir -p "$(dirname "${cluster_config_file}")" +mkdir -p "$(dirname "${cluster_config_tmp_file}")" +mkdir -p "$(dirname "${sh_status_file}")" +mkdir -p "$(dirname "${mongod_config_file}")" + +# TODO read env vars with log file names +pod_log_file="pod.log" +mongo_log_file="mongodb.log" +automation_stderr_log_file="automation-agent-stderr.log" +automation_verbose_log_file="automation-agent-verbose.log" +automation_log_file="automation-agent.log" +backup_agent="backup-agent.log" +monitoring_log_file="monitoring-agent.log" + +tail_pod_log "{{.Namespace}}" "{{.PodName}}" "${container_name}" "${logs_dir}/${pod_log_file}" & +tail_file "{{.Namespace}}" "{{.PodName}}" "${container_name}" "/var/log/mongodb-mms-automation/${mongo_log_file}" "${logs_dir}/${mongo_log_file}" & +tail_file "{{.Namespace}}" "{{.PodName}}" "${container_name}" "/var/log/mongodb-mms-automation/${automation_stderr_log_file}" "${logs_dir}/${automation_stderr_log_file}" & +tail_file "{{.Namespace}}" "{{.PodName}}" "${container_name}" "/var/log/mongodb-mms-automation/${automation_verbose_log_file}" "${logs_dir}/${automation_verbose_log_file}" & +tail_file "{{.Namespace}}" "{{.PodName}}" "${container_name}" "/var/log/mongodb-mms-automation/${automation_log_file}" "${logs_dir}/${automation_log_file}" & +tail_file "{{.Namespace}}" "{{.PodName}}" "${container_name}" "/var/log/mongodb-mms-automation/${backup_agent}" "${logs_dir}/${backup_agent}" & +tail_file "{{.Namespace}}" "{{.PodName}}" "${container_name}" "/var/log/mongodb-mms-automation/${monitoring_log_file}" "${logs_dir}/${monitoring_log_file}" & + +org_id="" +project_name="" +base_url="" +project_id="" +agent_api_key="" + +set +e +while true; do + kubectl_get_json "{{.Namespace}}" "mdb" "{{.ResourceName}}" "${cr_file}" + kubectl_get_json "{{.Namespace}}" "pod" "{{.PodName}}" "${pod_file}" + kubectl_get_json "{{.Namespace}}" "sts" "{{.StsName}}" "${sts_file}" + + mongos_conf_path=$(exec_cmd "{{.Namespace}}" "{{.PodName}}" "${container_name}" 'ls /var/lib/mongodb-mms-automation/workspace/*.conf | head -n 1') + get_file "{{.Namespace}}" "{{.PodName}}" "${container_name}" "${mongos_conf_path}" "${mongod_config_file}.yaml.tmp" + yq . "${mongod_config_file}.yaml.tmp" -o json >"${mongod_config_file}.tmp" + mv "${mongod_config_file}.tmp" "${mongod_config_file}" + + mongod_port=27017 + if [[ -f "${mongod_config_file}" ]]; then + mongod_port=$(jq -r '.net.port' "${mongod_config_file}") + fi + + get_file "{{.Namespace}}" "{{.PodName}}" "${container_name}" "/var/log/mongodb-mms-automation/agent-health-status.json" "${health_file}.tmp" + mv "${health_file}.tmp" "${health_file}" + + get_file "{{.Namespace}}" "{{.PodName}}" "${container_name}" "/var/log/mongodb-mms-automation/readiness.log" "${readiness_file}.tmp" + mv "${readiness_file}.tmp" "${readiness_file}" + + if [[ -z ${agent_api_key} ]]; then + IFS='|' read -r org_id project_name base_url project_id agent_api_key <<<"$(get_project_data "${namespace}" "{{.ResourceName}}")" + echo "org_id: ${org_id}" + echo "project_name: ${project_name}" + echo "base_url: ${base_url}" + echo "project_id: ${project_id}" + echo "agent_api_key: ${agent_api_key}" + fi + + if [[ -n ${agent_api_key} ]]; then + get_ac "${namespace}" "${base_url}" "${project_id}" "${agent_api_key}" >"${cluster_config_file}.tmp" + mv "${cluster_config_file}.tmp" "${cluster_config_file}" + fi + get_file "{{.Namespace}}" "{{.PodName}}" "${container_name}" "/tmp/mongodb-mms-automation-cluster-backup.json" "${cluster_config_tmp_file}" + + tls_args="" + if [[ "${tls_enabled}" == "true" ]]; then + tls_args="--tls --tlsCAFile /mongodb-automation/tls/ca/ca-pem" + fi + cmd="${mongosh_path} --host {{.PodName}}.{{.ResourceName}}-svc.{{.Namespace}}.svc.cluster.local --port ${mongod_port} ${tls_args} --eval 'JSON.stringify(sh.status())' --quiet" + exec_cmd "{{.Namespace}}" "{{.PodName}}" "${mongosh_container_name}" "${cmd}" | grep -v "Warning: Could not access" >"${sh_status_file}.tmp" + mv "${sh_status_file}.tmp" "${sh_status_file}" + + sleep 1 +done diff --git a/tools/mdbdebug/cmd/mdbdebug/templates/mongos_tmux_session.yaml.tpl b/tools/mdbdebug/cmd/mdbdebug/templates/mongos_tmux_session.yaml.tpl new file mode 100644 index 000000000..7720e1552 --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/templates/mongos_tmux_session.yaml.tpl @@ -0,0 +1,72 @@ +session_name: mdb-debug +global_options: + history-limit: 50000 + mouse: on + pane-border-status: top + pane-border-format: "#{pane_index}: #{pane_title}" +window_options: + remain-on-exit: on +shell_command_before: + - | + function retry_cmd() { + local cmd="$1" + local delay="${2:-3}" + + while true; do + eval "$cmd" + echo "Retrying..." + sleep "$delay" + done + } +windows: + - window_name: json + layout: tiled + panes: + - shell_command: + - tmux select-pane -t 0.0 -T '{{.ShortName}} (mdb)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/mdb/mdb.json --destDir /data/logs/mdb-debug/mdb -C=5 | tee -a /data/logs/mdb-debug/mdb/mdb.log" + - shell_command: + - tmux select-pane -t 0.1 -T '{{.ShortName}} (sts)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/sts/sts.json --destDir /data/logs/mdb-debug/sts -C=5 | tee -a /data/logs/mdb-debug/sts/sts.log" + - shell_command: + - tmux select-pane -t 0.2 -T '{{.ShortName}} (pod)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/pod/pod.json --destDir /data/logs/mdb-debug/pod -C=5 | tee -a /data/logs/mdb-debug/pod/pod.log" + - shell_command: + - tmux select-pane -t 0.3 -T '{{.ShortName}} (ac)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/ac/cluster-config.json --destDir /data/logs/mdb-debug/ac -C=5 | tee -a /data/logs/mdb-debug/ac/ac.log" + - shell_command: + - tmux select-pane -t 0.4 -T '{{.ShortName}} (ac tmp)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/ac_tmp/cluster-config.json --destDir /data/logs/mdb-debug/ac_tmp -C=5 | tee -a /data/logs/mdb-debug/ac_tmp/ac_tmp.log" + - shell_command: + - tmux select-pane -t 0.6 -T 'readiness.log' + - retry_cmd "less --follow-name -R +F /data/logs/mdb-debug/readiness/readiness.log.json | tee -a /data/logs/mdb-debug/readiness/terminal_output.log" + - shell_command: + - tmux select-pane -t 0.5 -T '{{.ShortName}} (health)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/health/health.json --destDir /data/logs/mdb-debug/health -C=5 --ignore LastMongoUpTime | tee -a /data/logs/mdb-debug/health/health.log" + - shell_command: + - tmux select-pane -t 0.6 -T '{{.ShortName}} (sh status)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/sh/status.json --destDir /data/logs/mdb-debug/sh/ -C=5 | tee -a /data/logs/mdb-debug/sh/sh_status.log" + - window_name: logs{{.PodIdx}} + layout: tiled + panes: + - shell_command: + - tmux select-pane -t 1.0 -T 'pod log' + - retry_cmd "less --follow-name +F /data/logs/mdb-debug/logs/pod.log" + - shell_command: + - tmux select-pane -t 1.1 -T 'automation-agent-verbose.log' + - retry_cmd "less --follow-name +F /data/logs/mdb-debug/logs/automation-agent-verbose.log" + - shell_command: + - tmux select-pane -t 1.2 -T 'automation-agent-stderr.log' + - retry_cmd "less --follow-name +F /data/logs/mdb-debug/logs/automation-agent-stderr.log" + - shell_command: + - tmux select-pane -t 1.3 -T 'mongodb.log' + - retry_cmd "less --follow-name +F /data/logs/mdb-debug/logs/mongodb.log" + - shell_command: + - tmux select-pane -t 1.4 -T 'automation-agent.log' + - retry_cmd "less --follow-name +F /data/logs/mdb-debug/logs/automation-agent.log" + - shell_command: + - tmux select-pane -t 1.5 -T 'backup-agent.log' + - retry_cmd "less --follow-name +F /data/logs/mdb-debug/logs/backup-agent.log" + - shell_command: + - tmux select-pane -t 1.6 -T 'monitoring-agent.log' + - retry_cmd "less --follow-name +F /data/logs/mdb-debug/logs/monitoring-agent.log" diff --git a/tools/mdbdebug/cmd/mdbdebug/templates/mongot_entrypoint.sh.tpl b/tools/mdbdebug/cmd/mdbdebug/templates/mongot_entrypoint.sh.tpl new file mode 100644 index 000000000..9de6fe5fb --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/templates/mongot_entrypoint.sh.tpl @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +set -Eeou pipefail + +{{ template "common.sh.tpl" }} + +set -x + +container_name="{{.ContainerName}}" + +sigterm() { + exit 0 +} +trap sigterm SIGTERM + +export LANG=en_GB.UTF-8 +tmuxp load -d -y "/scripts/session.yaml" + +base_log_dir="{{.BaseLogDir}}" +base_dir="${base_log_dir}/mdb-debug" +container_log_file="${base_log_dir}/mongot_container.log" + +cr_file="${base_dir}/cr/cr.json" +mongot_config_file="${base_dir}/config/config.json" +pod_file="${base_dir}/pod/pod.json" +sts_file="${base_dir}/sts/sts.json" + +mkdir -p "$(dirname "${cr_file}")" +mkdir -p "$(dirname "${pod_file}")" +mkdir -p "$(dirname "${sts_file}")" +mkdir -p "$(dirname "${mongot_config_file}")" + +tail_pod_log "{{.Namespace}}" "{{.PodName}}" "${container_name}" "${container_log_file}" & + +set +e +while true; do + kubectl_get_json "{{.Namespace}}" "{{.ResourceType}}" "{{.ResourceName}}" "${cr_file}" 2>&1 | prepend "get_{{.ResourceType}}_cr" + + kubectl get pod -n "{{.Namespace}}" "{{.PodName}}" -o json >${pod_file}.tmp + mv ${pod_file}.tmp ${pod_file} + + kubectl get sts -n "{{.Namespace}}" "{{.StsName}}" -o json >${sts_file}.tmp + mv ${sts_file}.tmp ${sts_file} + + + if get_file "{{.Namespace}}" "{{.PodName}}" "${container_name}" "/mongot/config/config.yml" "${mongot_config_file}.yaml.tmp"; then + yq . "${mongot_config_file}.yaml.tmp" -o json >"${mongot_config_file}.tmp" + mv "${mongot_config_file}.tmp" "${mongot_config_file}" + fi + + sleep 1 +done diff --git a/tools/mdbdebug/cmd/mdbdebug/templates/mongot_tmux_session.yaml.tpl b/tools/mdbdebug/cmd/mdbdebug/templates/mongot_tmux_session.yaml.tpl new file mode 100644 index 000000000..5254746b3 --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/templates/mongot_tmux_session.yaml.tpl @@ -0,0 +1,39 @@ +session_name: mdb-debug +global_options: + history-limit: 50000 + mouse: on + pane-border-status: top + pane-border-format: "#{pane_index}: #{pane_title}" +window_options: + remain-on-exit: on +shell_command_before: + - | + function retry_cmd() { + local cmd="$1" + local delay="${2:-3}" + + while true; do + eval "$cmd" + echo "Retrying..." + sleep "$delay" + done + } +windows: + - window_name: json + layout: tiled + panes: + - shell_command: + - tmux select-pane -t 0.0 -T '{{.ShortName}} (CR)' + - retry_cmd "diffwatch --file {{.BaseLogDir}}/mdb-debug/cr/cr.json --destDir {{.BaseLogDir}}/mdb-debug/cr -C=5" + - shell_command: + - tmux select-pane -t 0.1 -T '{{.ShortName}} (sts)' + - retry_cmd "diffwatch --file {{.BaseLogDir}}/mdb-debug/sts/sts.json --destDir {{.BaseLogDir}}/mdb-debug/sts -C=3" + - shell_command: + - tmux select-pane -t 0.2 -T '{{.ShortName}} (mongot cfg)' + - retry_cmd "diffwatch --file {{.BaseLogDir}}/mdb-debug/config/config.json --destDir {{.BaseLogDir}}/mdb-debug/config -C=3" + - shell_command: + - tmux select-pane -t 0.3 -T '{{.ShortName}} (pod)' + - retry_cmd "diffwatch --file {{.BaseLogDir}}/mdb-debug/pod/pod.json --destDir {{.BaseLogDir}}/mdb-debug/pod -C=3" + - shell_command: + - tmux select-pane -t 0.4 -T '{{.ShortName}} (pod log)' + - retry_cmd "lnav {{.BaseLogDir}}/mongot_container.log" diff --git a/tools/mdbdebug/cmd/mdbdebug/templates/om_backup_daemon_entrypoint.sh.tpl b/tools/mdbdebug/cmd/mdbdebug/templates/om_backup_daemon_entrypoint.sh.tpl new file mode 100644 index 000000000..021809118 --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/templates/om_backup_daemon_entrypoint.sh.tpl @@ -0,0 +1,81 @@ +#!/usr/bin/env bash + +set -Eeou pipefail + +set -x + +export LANG=en_GB.UTF-8 + +sigterm() { + exit 0 +} +trap sigterm SIGTERM + +tail_file() { + pod_name=$1 + container_name=$2 + pod_file_path=$3 + log_file_path=$4 + + while true; do + kubectl exec -n "{{.Namespace}}" -c "${container_name}" "{{.PodName}}" -- tail -F "${pod_file_path}" >> "${log_file_path}" || { + echo "failed to tail ${pod_name}:${pod_file_path} to ${log_file_path}" + sleep 3 + } + done +} + +tail_pod_log() { + pod_name=$1 + container_name=$2 + log_file_path=$3 + + while true; do + kubectl logs -n "{{.Namespace}}" -c "${container_name}" "{{.PodName}}" -f >> "${log_file_path}" || { + echo "failed to tail logs from ${pod_name} to ${log_file_path}" + sleep 3 + } + done +} + +kubectl_get_json() { + resource_type=$1 + resource_name=$2 + file_path=$3 + + kubectl get "${resource_type}" "${resource_name}" -n "{{.Namespace}}" -o json > "${file_path}.tmp" 2>"${file_path}.error.tmp" || { + echo "{\"error_message\":\"$(cat "${file_path}.error.tmp")\"}" > "${file_path}.tmp" + } + mv "${file_path}.tmp" "${file_path}" +} + +tmuxp load -d -y "/scripts/session.yaml" + +base_dir="/data/logs/mdb-debug" + +om_logs_dir="${base_dir}/logs" +om_cr_file="${base_dir}/om/om.json" +om_pod_file="${base_dir}/pod/pod.json" +om_sts_file="${base_dir}/sts/sts.json" + +daemon_startup_log_file="daemon-startup.log" +daemon_log_file="daemon.log" +pod_log="pod.log" + +mkdir -p "${om_logs_dir}" +mkdir -p "$(dirname "${om_cr_file}")" +mkdir -p "$(dirname "${om_pod_file}")" +mkdir -p "$(dirname "${om_sts_file}")" + +tail_pod_log "{{.PodName}}" "mongodb-backup-daemon" "${om_logs_dir}/${pod_log}" & +tail_file "{{.PodName}}" "mongodb-backup-daemon" "/mongodb-ops-manager/logs/${daemon_startup_log_file}" "${om_logs_dir}/${daemon_startup_log_file}" & +tail_file "{{.PodName}}" "mongodb-backup-daemon" "/mongodb-ops-manager/logs/${daemon_log_file}" "${om_logs_dir}/${daemon_log_file}" & + +set +e +while true; do + kubectl_get_json "om" "{{.ResourceName}}" "${om_cr_file}" + kubectl_get_json "pod" "{{.PodName}}" "${om_pod_file}" + kubectl_get_json "sts" "{{.StsName}}" "${om_sts_file}" + + sleep 1 +done diff --git a/tools/mdbdebug/cmd/mdbdebug/templates/om_backup_daemon_tmux_session.yaml.tpl b/tools/mdbdebug/cmd/mdbdebug/templates/om_backup_daemon_tmux_session.yaml.tpl new file mode 100644 index 000000000..9148fdbfa --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/templates/om_backup_daemon_tmux_session.yaml.tpl @@ -0,0 +1,42 @@ +session_name: mdb-debug +global_options: + history-limit: 50000 + mouse: on + pane-border-status: top + pane-border-format: "#{pane_index}: #{pane_title}" +window_options: + remain-on-exit: on +shell_command_before: + - | + function retry_cmd() { + local cmd="$1" + local delay="${2:-3}" + + while true; do + eval "$cmd" + echo "Retrying..." + sleep "$delay" + done + } +windows: + - window_name: json + layout: even-horizontal + panes: + - shell_command: + - tmux select-pane -t 0.0 -T '{{.ShortName}} (pod)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/pod/pod.json --destDir /data/logs/mdb-debug/pod -C=5 | tee -a /data/logs/mdb-debug/pod/pod.log" + - shell_command: + - tmux select-pane -t 0.1 -T '{{.ShortName}} (sts)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/sts/sts.json --destDir /data/logs/mdb-debug/sts -C=5 | tee -a /data/logs/mdb-debug/sts/sts.log" + - shell_command: + - tmux select-pane -t 0.2 -T '{{.ShortName}} (pod log)' + - retry_cmd "less --follow-name +F /data/logs/mdb-debug/logs/pod.log | tee -a /data/logs/mdb-debug/logs/pod_viewer.log" + - window_name: logs{{.PodIdx}} + layout: even-horizontal + panes: + - shell_command: + - tmux select-pane -t 1.0 -T 'daemon-startup.log' + - retry_cmd "less --follow-name +F /data/logs/mdb-debug/logs/daemon-startup.log | tee -a /data/logs/mdb-debug/logs/daemon-startup_viewer.log" + - shell_command: + - tmux select-pane -t 1.1 -T 'daemon.log' + - retry_cmd "less --follow-name +F /data/logs/mdb-debug/logs/daemon.log | tee -a /data/logs/mdb-debug/logs/daemon_viewer.log" diff --git a/tools/mdbdebug/cmd/mdbdebug/templates/om_entrypoint.sh.tpl b/tools/mdbdebug/cmd/mdbdebug/templates/om_entrypoint.sh.tpl new file mode 100644 index 000000000..f0f8eaf32 --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/templates/om_entrypoint.sh.tpl @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +set -Eeou pipefail + +set -x + +{{ template "common.sh.tpl" }} + +tls_enabled="{{.TLSEnabled}}" +static_arch="{{.StaticArch}}" +container_name="{{.ContainerName}}" + +tmuxp load -d -y "/scripts/session.yaml" +base_dir="/data/logs/mdb-debug" + +logs_dir="${base_dir}/logs" +cr_file="${base_dir}/om/om.json" +pod_file="${base_dir}/pod/pod.json" +sts_file="${base_dir}/sts/sts.json" +state_file="${base_dir}/state/state.json" + +mms_migration_log_file="mms-migration.log" +mms_access_log_file="mms0-access.log" +mms_startup_log_file="mms0-startup.log" +mms_log_file="mms0.log" +pod_log="pod.log" + +set +e + +mkdir -p "${logs_dir}" +mkdir -p "$(dirname "${cr_file}")" +mkdir -p "$(dirname "${pod_file}")" +mkdir -p "$(dirname "${sts_file}")" +mkdir -p "$(dirname "${state_file}")" + +echo "Starting tailing" +(tail_pod_log "{{.Namespace}}" "{{.PodName}}" "${container_name}" "${logs_dir}/${pod_log}" 2>&1 | prepend "tail_pod_log") & +(tail_file "{{.Namespace}}" "{{.PodName}}" "${container_name}" "/mongodb-ops-manager/logs/${mms_migration_log_file}" "${logs_dir}/${mms_migration_log_file}" 2>&1 | prepend "tail_file_mms_migration_log_file") & +(tail_file "{{.Namespace}}" "{{.PodName}}" "${container_name}" "/mongodb-ops-manager/logs/${mms_access_log_file}" "${logs_dir}/${mms_access_log_file}" 2>&1 | prepend "tail_file_mms_access_log_file") & +(tail_file "{{.Namespace}}" "{{.PodName}}" "${container_name}" "/mongodb-ops-manager/logs/${mms_startup_log_file}" "${logs_dir}/${mms_startup_log_file}" 2>&1 | prepend "tail_file_mms_startup_log_file") & +(tail_file "{{.Namespace}}" "{{.PodName}}" "${container_name}" "/mongodb-ops-manager/logs/${mms_log_file}" "${logs_dir}/${mms_log_file}" 2>&1 | prepend "tail_file_mms_log_file") & + + +while true; do + echo "loop iteration" + kubectl_get_json "{{.Namespace}}" "om" "{{.ResourceName}}" "${cr_file}" 2>&1 | prepend "get_json_om" + kubectl_get_json "{{.Namespace}}" "pod" "{{.PodName}}" "${pod_file}" 2>&1 | prepend "get_json_pod" + kubectl_get_json "{{.Namespace}}" "sts" "{{.StsName}}" "${sts_file}" 2>&1 | prepend "get_json_sts" + kubectl_get_state_json "{{.Namespace}}" "{{.ResourceName}}" "${state_file}" 2>&1 | prepend "get_json_state" + echo "sleeping..." + sleep 1 +done diff --git a/tools/mdbdebug/cmd/mdbdebug/templates/om_tmux_session.yaml.tpl b/tools/mdbdebug/cmd/mdbdebug/templates/om_tmux_session.yaml.tpl new file mode 100644 index 000000000..9f9786fe7 --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/templates/om_tmux_session.yaml.tpl @@ -0,0 +1,57 @@ +session_name: mdb-debug +global_options: + history-limit: 50000 + mouse: on + pane-border-status: top + pane-border-format: "#{pane_index}: #{pane_title}" +window_options: + remain-on-exit: on +shell_command_before: + - | + function retry_cmd() { + local cmd="$1" + local delay="${2:-3}" + + while true; do + eval "$cmd" + echo "Retrying..." + sleep "$delay" + done + } +windows: + - window_name: json + layout: tiled + panes: + - shell_command: + - tmux select-pane -t 0.0 -T '{{.ShortName}} (CR)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/om/om.json --destDir /data/logs/mdb-debug/om -C=5 | tee -a /data/logs/mdb-debug/om/om.log" + - shell_command: + - tmux select-pane -t 0.1 -T '{{.ShortName}} (pod)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/pod/pod.json --destDir /data/logs/mdb-debug/pod -C=5 | tee -a /data/logs/mdb-debug/pod/pod.log" + - shell_command: + - tmux select-pane -t 0.2 -T '{{.ShortName}} (sts)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/sts/sts.json --destDir /data/logs/mdb-debug/sts -C=5 | tee -a /data/logs/mdb-debug/sts/sts.log" + - shell_command: + - tmux select-pane -t 0.3 -T '{{.ShortName}} (mms-conf)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/pod_conf/conf-mms.json --destDir /data/logs/mdb-debug/pod_conf -A=20 -B=20 | tee -a /data/logs/mdb-debug/pod_conf/conf-mms.log" + - shell_command: + - tmux select-pane -t 0.4 -T '{{.ShortName}} (pod log)' + - retry_cmd "less --follow-name +F /data/logs/mdb-debug/logs/pod.log | tee -a /data/logs/mdb-debug/logs/pod_viewer.log" + - shell_command: + - tmux select-pane -t 0.5 -T '{{.ShortName}} (state)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/state/state.json --destDir /data/logs/mdb-debug/state | tee -a /data/logs/mdb-debug/state/state.log" + - window_name: logs{{.PodIdx}} + layout: tiled + panes: + - shell_command: + - tmux select-pane -t 1.0 -T 'mms-migration.log' + - retry_cmd "less --follow-name +F /data/logs/mdb-debug/logs/mms-migration.log" + - shell_command: + - tmux select-pane -t 1.1 -T 'mms0-access.log' + - retry_cmd "less --follow-name +F /data/logs/mdb-debug/logs/mms0-access.log" + - shell_command: + - tmux select-pane -t 1.2 -T 'mms0-startup.log' + - retry_cmd "less --follow-name +F /data/logs/mdb-debug/logs/mms0-startup.log" + - shell_command: + - tmux select-pane -t 1.3 -T 'mms0.log' + - retry_cmd "less --follow-name +F /data/logs/mdb-debug/logs/mms0.log" diff --git a/tools/mdbdebug/cmd/mdbdebug/templates/replicaset_entrypoint.sh.tpl b/tools/mdbdebug/cmd/mdbdebug/templates/replicaset_entrypoint.sh.tpl new file mode 100644 index 000000000..b5e2a5233 --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/templates/replicaset_entrypoint.sh.tpl @@ -0,0 +1,118 @@ +#!/usr/bin/env bash + +set -Eeou pipefail + +set -x + +{{ template "common.sh.tpl" }} + +tls_enabled="{{.TLSEnabled}}" +static_arch="{{.StaticArch}}" +container_name="{{.ContainerName}}" +pod_fqdn="{{.PodFQDN}}" +mongosh_container_name="mongodb-enterprise-database" + +if [[ "${static_arch}" == "true" ]]; then + mongosh_path='/bin/mongosh' +else + mongosh_path='/var/lib/mongodb-mms-automation/mongosh-linux-x86_64-2.2.4/bin/mongosh' +fi + + +tmuxp load -d -y "/scripts/session.yaml" +base_log_dir="/data/logs" +base_dir="/data/logs/mdb-debug" + +logs_dir="${base_dir}/logs" +cr_file="${base_dir}/mdb/mdb.json" +pod_file="${base_dir}/pod/pod.json" +sts_file="${base_dir}/sts/sts.json" +health_file="${base_dir}/health/health.json" +readiness_file="${base_dir}/readiness/readiness.log.json" +cluster_config_file="${base_dir}/ac/cluster-config.json" +cluster_config_tmp_file="${base_dir}/ac_tmp/cluster-config.json" +rs_config_file="${base_dir}/rs/config.json" +rs_hello_file="${base_dir}/rs_hello/hello.json" +mongod_config_file="${base_dir}/mongod_config/config.json" +state_file="${base_dir}/state/state.json" + +# we have to wait for the db pod to create /data/logs otherwise we create it with incorrect permissions +while [ ! -d "${base_log_dir}" ]; do + echo "Waiting for ${base_log_dir} to be initialized by the db pod" + sleep 1 +done + +mkdir -p "${logs_dir}" +mkdir -p "$(dirname "${cr_file}")" +mkdir -p "$(dirname "${pod_file}")" +mkdir -p "$(dirname "${sts_file}")" +mkdir -p "$(dirname "${cluster_config_file}")" +mkdir -p "$(dirname "${cluster_config_tmp_file}")" +mkdir -p "$(dirname "${rs_config_file}")" +mkdir -p "$(dirname "${rs_hello_file}")" +mkdir -p "$(dirname "${health_file}")" +mkdir -p "$(dirname "${mongod_config_file}")" +mkdir -p "$(dirname "${state_file}")" + +# TODO read env vars with log file names +pod_log_file="pod.log" + +tail_pod_log "{{.Namespace}}" "{{.PodName}}" "${container_name}" "${logs_dir}/${pod_log_file}" & + +org_id="" +project_name="" +base_url="" +project_id="" +agent_api_key="" + +set +e +while true; do + kubectl_get_json "{{.Namespace}}" "mdb" "{{.ResourceName}}" "${cr_file}" + kubectl_get_json "{{.Namespace}}" "pod" "{{.PodName}}" "${pod_file}" + kubectl_get_json "{{.Namespace}}" "sts" "{{.StsName}}" "${sts_file}" + + get_file "{{.Namespace}}" "{{.PodName}}" "${container_name}" "/data/automation-mongod.conf" "${mongod_config_file}.yaml.tmp" + yq . "${mongod_config_file}.yaml.tmp" -o json >"${mongod_config_file}.tmp" + mv "${mongod_config_file}.tmp" "${mongod_config_file}" + + mongod_port=27017 + if [[ -f "${mongod_config_file}" ]]; then + mongod_port=$(jq -r '.net.port' "${mongod_config_file}") + fi + + get_file "{{.Namespace}}" "{{.PodName}}" "${container_name}" "/var/log/mongodb-mms-automation/readiness.log" "${readiness_file}.tmp" + mv "${readiness_file}.tmp" "${readiness_file}" + + if [[ -z ${agent_api_key} ]]; then + IFS='|' read -r org_id base_url project_id agent_api_key <<<"$(get_project_data2 "${namespace}" "{{.ResourceName}}" "{{.PodName}}")" + echo "org_id: ${org_id}" + echo "base_url: ${base_url}" + echo "project_id: ${project_id}" + echo "agent_api_key: ${agent_api_key}" + fi + + if [[ -n ${agent_api_key} ]]; then + get_ac "${namespace}" "${base_url}" "${project_id}" "${agent_api_key}" >"${cluster_config_file}.tmp" + mv "${cluster_config_file}.tmp" "${cluster_config_file}" + fi + get_file "{{.Namespace}}" "{{.PodName}}" "${container_name}" "/tmp/mongodb-mms-automation-cluster-backup.json" "${cluster_config_tmp_file}" + + tls_args="" + if [[ "${tls_enabled}" == "true" ]]; then + tls_args="--tls --tlsCAFile /mongodb-automation/tls/ca/ca-pem" + fi + cmd="${mongosh_path} --host {{.PodFQDN}} --port ${mongod_port} --eval 'JSON.stringify(rs.config())' --quiet" + exec_cmd "{{.Namespace}}" "{{.PodName}}" "${mongosh_container_name}" "${cmd}" | grep -v "Warning: Could not access" >"${rs_config_file}.tmp" + mv "${rs_config_file}.tmp" "${rs_config_file}" + + cmd="${mongosh_path} --host {{.PodFQDN}} --port ${mongod_port} --eval 'JSON.stringify(db.hello())' --quiet" + exec_cmd "{{.Namespace}}" "{{.PodName}}" "${mongosh_container_name}" "${cmd}" | grep -v "Warning: Could not access" >"${rs_hello_file}.tmp" + mv "${rs_hello_file}.tmp" "${rs_hello_file}" + + cp /data/logs/agent-health-status.json /data/logs/mdb-debug/health/health.json.tmp + mv /data/logs/mdb-debug/health/health.json.tmp /data/logs/mdb-debug/health/health.json + + kubectl_get_state_json "{{.Namespace}}" "{{.ResourceName}}" "${state_file}" + + sleep 1 +done diff --git a/tools/mdbdebug/cmd/mdbdebug/templates/replicaset_tmux_session.yaml.tpl b/tools/mdbdebug/cmd/mdbdebug/templates/replicaset_tmux_session.yaml.tpl new file mode 100644 index 000000000..243144e68 --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/templates/replicaset_tmux_session.yaml.tpl @@ -0,0 +1,81 @@ +session_name: mdb-debug +global_options: + history-limit: 50000 + mouse: on + pane-border-status: top + pane-border-format: "#{pane_index}: #{pane_title}" +window_options: + remain-on-exit: on +shell_command_before: + - | + function retry_cmd() { + local cmd="$1" + local delay="${2:-3}" + + while true; do + eval "$cmd" + echo "Retrying..." + sleep "$delay" + done + } +windows: + - window_name: json + layout: tiled + panes: + - shell_command: + - tmux select-pane -t 0.0 -T '{{.ShortName}} (mdb)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/mdb/mdb.json --destDir /data/logs/mdb-debug/mdb -C=5 | tee -a /data/logs/mdb-debug/mdb/mdb.log" + - shell_command: + - tmux select-pane -t 0.1 -T '{{.ShortName}} (sts)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/sts/sts.json --destDir /data/logs/mdb-debug/sts -C=5 | tee -a /data/logs/mdb-debug/sts/sts.log" + - shell_command: + - tmux select-pane -t 0.2 -T '{{.ShortName}} (pod)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/pod/pod.json --destDir /data/logs/mdb-debug/pod -C=5 | tee -a /data/logs/mdb-debug/pod/pod.log" + - shell_command: + - tmux select-pane -t 0.3 -T '{{.ShortName}} (ac)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/ac/cluster-config.json --destDir /data/logs/mdb-debug/ac -C=5 | tee -a /data/logs/mdb-debug/ac/ac.log" + - shell_command: + - tmux select-pane -t 0.4 -T '{{.ShortName}} (ac tmp)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/ac_tmp/cluster-config.json --destDir /data/logs/mdb-debug/ac_tmp -C=5 | tee -a /data/logs/mdb-debug/ac_tmp/ac_tmp.log" + - shell_command: + - tmux select-pane -t 0.5 -T 'readiness.log' + - retry_cmd "less --follow-name -R +F /data/logs/mdb-debug/readiness/readiness.log.json | tee -a /data/logs/mdb-debug/readiness/terminal_output.log" + - shell_command: + - tmux select-pane -t 0.6 -T '{{.ShortName}} (rs config)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/rs/config.json --destDir /data/logs/mdb-debug/rs/ -C=5 | tee -a /data/logs/mdb-debug/rs/rs_config.log" + - shell_command: + - tmux select-pane -t 0.7 -T '{{.ShortName}} (rs hello)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/rs_hello/hello.json --destDir /data/logs/mdb-debug/rs_hello/ -C=5 --ignore connectionId --ignore '\\d{19}' --ignore '\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}' | tee -a /data/logs/mdb-debug/rs_hello/rs_hello.log" + - shell_command: + - tmux select-pane -t 0.8 -T '{{.ShortName}} (mongod cfg)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/mongod_config/config.json --destDir /data/logs/mdb-debug/mongod_config/ -C=5 | tee -a /data/logs/mdb-debug/mongod_config/mongod_config.log" + - shell_command: + - tmux select-pane -t 0.9 -T '{{.ShortName}} (health)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/health/health.json --destDir /data/logs/mdb-debug/health -C=5 --ignore LastMongoUpTime | tee -a /data/logs/mdb-debug/health/health.log" + - shell_command: + - tmux select-pane -t 0.10 -T '{{.ShortName}} (state)' + - retry_cmd "diffwatch --file /data/logs/mdb-debug/state/state.json --destDir /data/logs/mdb-debug/state | tee -a /data/logs/mdb-debug/state/state.log" + - window_name: logs{{.PodIdx}} + layout: tiled + panes: + - shell_command: + - tmux select-pane -t 1.0 -T 'pod log' + - retry_cmd "less --follow-name +F /data/logs/mdb-debug/logs/pod.log" + - shell_command: + - tmux select-pane -t 1.1 -T 'automation-agent-verbose.log' + - retry_cmd "less --follow-name +F /data/logs/automation-agent-verbose.log" + - shell_command: + - tmux select-pane -t 1.2 -T 'automation-agent-stderr.log' + - retry_cmd "less --follow-name +F /data/logs/automation-agent-stderr.log" + - shell_command: + - tmux select-pane -t 1.3 -T 'mongodb.log' + - retry_cmd "less --follow-name +F /data/logs/mongodb.log" + - shell_command: + - tmux select-pane -t 1.4 -T 'automation-agent.log' + - retry_cmd "less --follow-name +F /data/logs/automation-agent.log" + - shell_command: + - tmux select-pane -t 1.5 -T 'backup-agent.log' + - retry_cmd "less --follow-name +F /data/logs/backup-agent.log" + - shell_command: + - tmux select-pane -t 1.6 -T 'monitoring-agent.log' + - retry_cmd "less --follow-name +F /data/logs/monitoring-agent.log" diff --git a/tools/mdbdebug/cmd/mdbdebug/templates/retry_cmd.sh b/tools/mdbdebug/cmd/mdbdebug/templates/retry_cmd.sh new file mode 100644 index 000000000..7a2a05b0d --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/templates/retry_cmd.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# Function to retry a command with a configurable delay +# Usage: retry_cmd "command to execute" delay_seconds +retry_cmd() { + local cmd="$1" + local delay="${2:-3}" + + while true; do + eval "$cmd" + echo "Retrying..." + sleep "$delay" + done +} \ No newline at end of file diff --git a/tools/mdbdebug/cmd/mdbdebug/templates_test.go b/tools/mdbdebug/cmd/mdbdebug/templates_test.go new file mode 100644 index 000000000..f8e390629 --- /dev/null +++ b/tools/mdbdebug/cmd/mdbdebug/templates_test.go @@ -0,0 +1,59 @@ +package main + +import ( + "fmt" + "github.com/mongodb/mongodb-kubernetes/pkg/util/env" + "github.com/stretchr/testify/assert" + "os" + "strings" + "testing" +) + +func TestTemplates(t *testing.T) { + templateData := TemplateData{ + Namespace: "ns", + ResourceName: "mdb", + ResourceType: "mdb", + StsName: "mdb-0", + PodName: "mdb-0-1", + PodIdx: 1, + ClusterIdx: 0, + ShortName: "m", + } + var str string + var err error + + templateFiles := []string{ + "appdb_entrypoint.sh.tpl", + "appdb_tmux_session.yaml.tpl", + "om_backup_daemon_entrypoint.sh.tpl", + "om_backup_daemon_tmux_session.yaml.tpl", + "om_entrypoint.sh.tpl", + "om_tmux_session.yaml.tpl", + "mongos_entrypoint.sh.tpl", + "mongos_entrypoint.sh.tpl", + "replicaset_entrypoint.sh.tpl", + "replicaset_tmux_session.yaml.tpl", + } + + projectDir := env.ReadOrDefault("PROJECT_DIR", "") + var generatedDir string + if projectDir != "" { + generatedDir = fmt.Sprintf("%s/.generated/tmp", projectDir) + if err := os.Mkdir(generatedDir, 0760); err != nil { + fmt.Printf("Failed to create dir %s: %s\n", generatedDir, err) + } + } + + for _, templateFile := range templateFiles { + str, err = renderTemplate(templateFile, templateData) + if generatedDir != "" { + filePath := fmt.Sprintf("%s/%s", generatedDir, strings.ReplaceAll(templateFile, ".tpl", "")) + if err := os.WriteFile(filePath, []byte(str), 0660); err != nil { + fmt.Printf("Failed to write file %s: %s", filePath, err) + } + } + assert.NoError(t, err) + assert.NotEmpty(t, str) + } +} diff --git a/tools/mdbdebug/watch.sh b/tools/mdbdebug/watch.sh new file mode 100755 index 000000000..97993b42a --- /dev/null +++ b/tools/mdbdebug/watch.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +# This script is for attaching to a previously created debugging pod. + +set -Eeou pipefail +set -x