Skip to content

Commit

Permalink
refactor realcluster testing
Browse files Browse the repository at this point in the history
Signed-off-by: Ryan Leung <[email protected]>
  • Loading branch information
rleungx committed Feb 19, 2025
1 parent ec2de11 commit 2a2c0df
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 63 deletions.
91 changes: 66 additions & 25 deletions tests/integrations/realcluster/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,44 +28,85 @@ tidy:
git diff go.mod go.sum | cat
git diff --quiet go.mod go.sum

.PHONY: check
check: tiup test

.PHONY: tiup
tiup:
# if tiup binary not exist, download it
if ! which tiup > /dev/null 2>&1; then \
curl --proto '=https' --tlsv1.2 -sSf https://tiup-mirrors.pingcap.com/install.sh | sh; \
fi
@echo "==> Checking tiup installation"
@if ! which tiup > /dev/null 2>&1; then \
echo "Installing tiup..."; \
curl --proto '=https' --tlsv1.2 -sSf https://tiup-mirrors.pingcap.com/install.sh | sh; \
else \
echo "tiup already installed"; \
fi
@echo "tiup version: $$(tiup --version)"

.PHONY: deploy
deploy: kill_cluster deploy_only

.PHONY: deploy_only
deploy_only:
@ echo "deploying..."
./deploy.sh
@ echo "wait cluster ready..."
./wait_tiup.sh 15 20
@ echo "check cluster status..."
@ pid=$$(ps -ef | grep 'playground' | grep -v grep | awk '{print $$2}' | head -n 1); \
echo $$pid;
@echo "==> Deploying cluster..."
@./deploy.sh
@echo "==> Waiting for cluster to be ready..."
@./wait_tiup.sh 15 20
@echo "==> Checking cluster status..."
@if pid=$$(pgrep -f "tiup.*playground.*--tag"); then \
echo "Playground process running with pid: $$pid"; \
tiup playground display; \
else \
echo "ERROR: Playground process not found"; \
exit 1; \
fi

.PHONY: kill_cluster
kill_cluster:
@ echo "kill cluster..."
@ pid=$$(ps -ef | grep 'playground' | grep -v grep | awk '{print $$2}' | head -n 1); \
if [ ! -z "$$pid" ]; then \
echo $$pid; \
@echo "==> Killing cluster..."
@if pid=$$(pgrep -f "tiup.*playground.*--tag"); then \
echo "Found playground process(es):"; \
ps -fp $$pid; \
echo "Sending SIGTERM..."; \
kill $$pid; \
echo "waiting for cluster to exit..."; \
sleep 30; \
echo "Waiting for process to exit (30s)..."; \
for i in $$(seq 1 30); do \
if ! kill -0 $$pid 2>/dev/null; then \
echo "Process exited"; \
exit 0; \
fi; \
sleep 1; \
done; \
echo "Process still running, sending SIGKILL..."; \
kill -9 $$pid || true; \
else \
echo "No playground process found"; \
fi

.PHONY: test
test:
CGO_ENABLED=1 go test ./... -v -tags deadlock -race -cover || (\
echo "follow is pd-0 log" ; \
cat ~/.tiup/data/pd_real_cluster_test/pd-0/pd.log ; \
echo "follow is pd-1 log" ; \
cat ~/.tiup/data/pd_real_cluster_test/pd-1/pd.log ; \
echo "follow is pd-2 log" ; \
cat ~/.tiup/data/pd_real_cluster_test/pd-2/pd.log ; \
exit 1)
@echo "==> Running integration tests..."
@CGO_ENABLED=1 go test ./... -v -tags deadlock -race -cover || (\
echo "==> Test failed. Collecting logs..."; \
$(MAKE) collect_logs; \
exit 1 \
)

.PHONY: collect_logs
collect_logs:
@echo "==> Collecting cluster logs..."
@for pd in 0 1 2; do \
echo "### PD-$$pd logs ###"; \
cat ~/.tiup/data/pd_real_cluster_test/pd-$$pd/pd.log 2>/dev/null || echo "No log file found"; \
echo; \
done
@echo "==> Collecting playground logs..."
@find /tmp/real_cluster/playground -type f -name "*.log" -exec sh -c 'echo "### $$(basename {}) ###"; cat {}; echo' \;

.PHONY: clean
clean:
@echo "==> Cleaning up..."
@rm -rf /tmp/real_cluster/playground/*
@echo "==> Cleaned playground logs"

install-tools:
cd $(ROOT_PATH) && $(MAKE) install-tools
56 changes: 27 additions & 29 deletions tests/integrations/realcluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,12 @@ package realcluster
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"syscall"
"testing"
"time"

"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
"go.uber.org/zap"

Expand Down Expand Up @@ -68,13 +65,12 @@ func NewProcessManager(tag string) *ProcessManager {

// CollectPids will collect the pids of the processes.
func (pm *ProcessManager) CollectPids() error {
cmd := exec.Command("pgrep", "-f", pm.tag)
output, err := cmd.Output()
output, err := runCommandWithOutput(fmt.Sprintf("pgrep -f %s", pm.tag))
if err != nil {
return fmt.Errorf("failed to collect pids: %v", err)
}

for _, pidStr := range strings.Split(strings.TrimSpace(string(output)), "\n") {
for _, pidStr := range strings.Split(strings.TrimSpace(output), "\n") {
if pid, err := strconv.Atoi(pidStr); err == nil {
pm.pids = append(pm.pids, pid)
}
Expand Down Expand Up @@ -109,20 +105,17 @@ func isProcessRunning(pid int) bool {

// SetupSuite will run before the tests in the suite are run.
func (s *clusterSuite) SetupSuite() {
t := s.T()
re := s.Require()

// Clean the data dir. It is the default data dir of TiUP.
dataDir := filepath.Join(os.Getenv("HOME"), ".tiup", "data", "pd_real_cluster_test_"+s.suiteName+"_*")
matches, err := filepath.Glob(dataDir)
require.NoError(t, err)
re.NoError(err)

for _, match := range matches {
require.NoError(t, runCommand("rm", "-rf", match))
re.NoError(runCommand("rm", "-rf", match))
}
s.startCluster(t)
t.Cleanup(func() {
s.stopCluster()
})
s.startCluster()
}

// TearDownSuite will run after all the tests in the suite have been run.
Expand All @@ -134,10 +127,10 @@ func (s *clusterSuite) TearDownSuite() {
s.stopCluster()
}

func (s *clusterSuite) startCluster(t *testing.T) {
func (s *clusterSuite) startCluster() {
log.Info("start to deploy a cluster", zap.Bool("ms", s.ms))
s.deployTiupPlayground(t)
require.NoError(t, s.waitTiupReady())
s.deploy()
s.waitReady()
s.clusterCnt++
}

Expand Down Expand Up @@ -166,28 +159,29 @@ func (s *clusterSuite) restart() {
tag := s.tag()
log.Info("start to restart", zap.String("tag", tag))
s.stopCluster()
s.startCluster(s.T())
s.startCluster()
log.Info("TiUP restart success")
}

func (s *clusterSuite) deployTiupPlayground(t *testing.T) {
func (s *clusterSuite) deploy() {
re := s.Require()
curPath, err := os.Getwd()
require.NoError(t, err)
require.NoError(t, os.Chdir("../../.."))
re.NoError(err)
re.NoError(os.Chdir("../../.."))

if !fileExists("third_bin") || !fileExists("third_bin/tikv-server") || !fileExists("third_bin/tidb-server") || !fileExists("third_bin/tiflash") {
log.Info("downloading binaries...")
log.Info("this may take a few minutes, you can also download them manually and put them in the bin directory.")
require.NoError(t, runCommand("sh",
re.NoError(runCommand("sh",
"./tests/integrations/realcluster/download_integration_test_binaries.sh"))
}
if !fileExists("bin") || !fileExists("bin/pd-server") {
log.Info("complie pd binaries...")
require.NoError(t, runCommand("make", "pd-server"))
re.NoError(runCommand("make", "pd-server"))
}

if !fileExists(playgroundLogDir) {
require.NoError(t, os.MkdirAll(playgroundLogDir, 0755))
re.NoError(os.MkdirAll(playgroundLogDir, 0755))
}

// nolint:errcheck
Expand Down Expand Up @@ -221,7 +215,7 @@ func (s *clusterSuite) deployTiupPlayground(t *testing.T) {

// Avoid to change the dir before execute `tiup playground`.
time.Sleep(10 * time.Second)
require.NoError(t, os.Chdir(curPath))
re.NoError(os.Chdir(curPath))
}

func buildBinPathsOpts(ms bool) string {
Expand All @@ -242,7 +236,8 @@ func buildBinPathsOpts(ms bool) string {
return strings.Join(opts, " ")
}

func (s *clusterSuite) waitTiupReady() error {
func (s *clusterSuite) waitReady() {
re := s.Require()
const (
interval = 5
maxTimes = 20
Expand All @@ -255,16 +250,19 @@ func (s *clusterSuite) waitTiupReady() error {
for i := 0; i < maxTimes; i++ {
select {
case <-timeout:
return fmt.Errorf("TiUP is not ready after timeout, tag: %s", s.tag())
re.FailNowf("TiUP is not ready after timeout, tag: %s", s.tag())
case <-ticker.C:
err := runCommand(tiupBin, "playground", "display", "--tag", s.tag())
log.Info("check TiUP ready", zap.String("tag", s.tag()))
cmd := fmt.Sprintf(`%s playground display --tag %s`, tiupBin, s.tag())
output, err := runCommandWithOutput(cmd)
if err == nil {
log.Info("TiUP is ready", zap.String("tag", s.tag()))
return nil
return
}
log.Info(output)
log.Info("TiUP is not ready, will retry", zap.Int("retry times", i),
zap.String("tag", s.tag()), zap.Error(err))
}
}
return fmt.Errorf("TiUP is not ready after max retries, tag: %s", s.tag())
re.FailNowf("TiUP is not ready after max retries, tag: %s", s.tag())
}
6 changes: 3 additions & 3 deletions tests/integrations/realcluster/cluster_id_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func (s *clusterIDSuite) TestClientClusterID() {
re := require.New(s.T())
ctx := context.Background()
// deploy second cluster
s.startCluster(s.T())
s.startCluster()
defer s.stopCluster()

pdEndpoints := getPDEndpoints(s.T())
Expand All @@ -57,10 +57,10 @@ func (s *clusterIDSuite) TestClientClusterID() {
}

func getPDEndpoints(t *testing.T) []string {
pdAddrsForEachTikv, err := runCommandWithOutput("ps -ef | grep tikv-server | awk -F '--pd-endpoints=' '{print $2}' | awk '{print $1}'")
output, err := runCommandWithOutput("ps -ef | grep tikv-server | awk -F '--pd-endpoints=' '{print $2}' | awk '{print $1}'")
require.NoError(t, err)
var pdAddrs []string
for _, addr := range pdAddrsForEachTikv {
for _, addr := range strings.Split(strings.TrimSpace(output), "\n") {
// length of addr is less than 5 means it must not be a valid address
if len(addr) < 5 {
continue
Expand Down
8 changes: 7 additions & 1 deletion tests/integrations/realcluster/etcd_key_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package realcluster
import (
"fmt"
"slices"
"strings"
"testing"

"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -130,7 +131,12 @@ func getEtcdKey(endpoints, prefix string) ([]string, error) {
// `sed 's/[0-9]*//g'` is used to remove the number in the etcd key, such as the cluster id.
etcdCmd := fmt.Sprintf("etcdctl --endpoints=%s get %s --prefix --keys-only | sed 's/[0-9]*//g' | sort | uniq",
endpoints, prefix)
return runCommandWithOutput(etcdCmd)
output, err := runCommandWithOutput(etcdCmd)
if err != nil {
return nil, err
}
return strings.Split(strings.TrimSpace(output), "\n"), nil

}

func checkEtcdKey(t *testing.T, keys, expectedKeys []string) bool {
Expand Down
8 changes: 3 additions & 5 deletions tests/integrations/realcluster/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ package realcluster
import (
"os"
"os/exec"
"strings"
"time"
)

Expand All @@ -41,14 +40,13 @@ func runCommand(name string, args ...string) error {
return cmd.Run()
}

func runCommandWithOutput(cmdStr string) ([]string, error) {
func runCommandWithOutput(cmdStr string) (string, error) {
cmd := exec.Command("sh", "-c", cmdStr)
bytes, err := cmd.Output()
if err != nil {
return nil, err
return "", err
}
output := strings.Split(string(bytes), "\n")
return output, nil
return string(bytes), nil
}

func fileExists(path string) bool {
Expand Down

0 comments on commit 2a2c0df

Please sign in to comment.