opensafari/.github/workflows/memory-soak.yml at main · shaun0927/opensafari · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
name: Memory Soak Test

# Nightly long-session memory SLO regression guard (issue #554).
# Runs the 60-minute round-robin soak test against a self-hosted macOS runner
# that has a booted iOS simulator. On failure the sentinel job posts a comment
# on the tracking issue and, after 7 consecutive failures, opens a labelled
# regression issue.

on:
  schedule:
    - cron: '0 3 * * *'  # 03:00 UTC daily
  workflow_dispatch: {}

permissions:
  contents: read
  issues: write

jobs:
  soak:
    runs-on: [self-hosted, macOS]
    timeout-minutes: 90
    env:
      OPENSAFARI_RUN_SOAK: '1'

    steps:
      - uses: actions/checkout@v4

      - name: Print runner info
        run: |
          sw_vers
          xcodebuild -version
          echo "Xcode select: $(xcode-select -p)"
          node --version
          npm --version

      - uses: actions/setup-node@v4
        with:
          node-version: '20'
          cache: 'npm'

      - name: Install dependencies
        run: npm ci

      - name: Build
        run: npm run build

      - name: Boot simulator
        run: |
          DEVICE_NAME="iPhone 16"

          # Reuse an already-booted device if one exists.
          BOOTED=$(xcrun simctl list devices booted -j \
            | python3 -c "
          import json,sys
          data=json.load(sys.stdin)
          udids=[d['udid'] for devs in data['devices'].values() for d in devs if d.get('state')=='Booted']
          print(udids[0] if udids else '')
          " 2>/dev/null || echo "")

          if [ -n "$BOOTED" ]; then
            DEVICE="$BOOTED"
            echo "Reusing booted simulator: $DEVICE"
          else
            # Find an available device matching the target name.
            DEVICE=$(xcrun simctl list devices available -j \
              | python3 -c "
          import json,sys
          data=json.load(sys.stdin)
          udids=[d['udid'] for devs in data['devices'].values() for d in devs if d.get('name')=='${DEVICE_NAME}']
          print(udids[0] if udids else '')
              " 2>/dev/null || echo "")

            if [ -z "$DEVICE" ]; then
              echo "No '${DEVICE_NAME}' device found; creating one."
              DEVICE=$(xcrun simctl create "SoakTest" "${DEVICE_NAME}" 2>/dev/null || echo "")
            fi

            if [ -z "$DEVICE" ]; then
              echo "ERROR: Could not find or create simulator." >&2
              exit 1
            fi

            xcrun simctl boot "$DEVICE" 2>/dev/null || true
            xcrun simctl bootstatus "$DEVICE" -b
          fi

          echo "DEVICE_UDID=$DEVICE" >> "$GITHUB_ENV"
          xcrun simctl list devices booted

      - name: Run soak test
        # testPathIgnorePatterns in jest.config.js excludes tests/soak/;
        # override with --testPathIgnorePatterns=/node_modules/ so the test
        # actually executes here (same pattern as sim-hid-sentinel workflow).
        run: |
          npx jest tests/soak/ \
            --testPathIgnorePatterns=/node_modules/ \
            --testTimeout=3700000 \
            --forceExit \
            2>&1 | tee soak.log
        timeout-minutes: 75

      - name: Upload RSS baseline and logs
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: rss-baseline-${{ github.run_number }}
          path: |
            soak.log
            tests/soak/output/
          retention-days: 90
          if-no-files-found: warn

      - name: Shutdown simulator
        if: always()
        run: xcrun simctl shutdown "$DEVICE_UDID" 2>/dev/null || true

  sentinel:
    needs: soak
    if: failure()
    runs-on: [self-hosted, macOS]

    steps:
      - uses: actions/checkout@v4

      - name: Check consecutive failures and open issue
        uses: actions/github-script@v7
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          script: |
            // Check the last 7 completed runs of this workflow for consecutive failures.
            const runs = await github.rest.actions.listWorkflowRuns({
              owner: context.repo.owner,
              repo: context.repo.repo,
              workflow_id: 'memory-soak.yml',
              per_page: 7,
              status: 'completed',
            });

            let consecutiveFailures = 0;
            for (const r of runs.data.workflow_runs) {
              if (r.conclusion === 'failure') consecutiveFailures++;
              else break;
            }

            if (consecutiveFailures >= 7) {
              // Open a labelled regression issue if none is already open.
              const existing = await github.rest.issues.listForRepo({
                owner: context.repo.owner,
                repo: context.repo.repo,
                labels: 'memory-regression',
                state: 'open',
              });

              if (existing.data.length === 0) {
                await github.rest.issues.create({
                  owner: context.repo.owner,
                  repo: context.repo.repo,
                  title: 'Memory regression detected — 7 consecutive soak test failures',
                  body: [
                    'The nightly memory soak test has failed 7 times in a row.',
                    '',
                    `See: ${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/workflows/memory-soak.yml`,
                    '',
                    '## Triage checklist',
                    '',
                    '- Download the `rss-baseline-*` artifact from the latest failing run.',
                    '- Identify which backend tier correlates with the RSS spike (Tier 0 Flutter VM, Tier 1 SimHID, Tier 1.5 AX press, Tier 3 WebKit).',
                    '- Check `tests/soak/README.md` for interpretation guidance.',
                    '- Re-run locally with `OPENSAFARI_RUN_SOAK=1 npx jest tests/soak/ --testTimeout=3700000`.',
                    '',
                    'Ref: #554',
                  ].join('\n'),
                  labels: ['memory-regression', 'reliability'],
                });
              }
            }

            // Always post a failure comment on the tracking issue.
            await github.rest.issues.createComment({
              owner: context.repo.owner,
              repo: context.repo.repo,
              issue_number: 554,
              body: [
                `Memory soak test failed — run #${context.runNumber}`,
                '',
                `See: ${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`,
                '',
                `Consecutive failures in last 7 runs: ${consecutiveFailures}`,
              ].join('\n'),
            });