openvmm/.github/workflows/code-review-metrics.yml at main · microsoft/openvmm · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
---
name: Code Review Metrics

on:
  schedule:
    - cron: '0 0 * * 1'  # Weekly on Mondays at midnight UTC
  workflow_dispatch:
    inputs:
      days:
        description: 'Analysis period in days'
        required: false
        default: '30'
        type: string

permissions:
  contents: read
  pull-requests: read
  issues: read

jobs:
  review-metrics:
    runs-on: ubuntu-latest
    name: Generate Code Review Metrics

    steps:
      - name: Checkout
        uses: actions/checkout@v6

      - name: Calculate Date Range
        id: date-range
        run: |
          days="${{ github.event.inputs.days || '30' }}"
          start_date=$(date -d "$days days ago" +%Y-%m-%d)
          echo "start_date=$start_date" >> $GITHUB_OUTPUT

      - name: Collect Code Review Metrics
        run: |
          # Authenticate with GitHub CLI
          echo "${{ secrets.GITHUB_TOKEN }}" | gh auth login --with-token

          # Get PR data for the specified period
          start_date="${{ steps.date-range.outputs.start_date }}"
          days="${{ github.event.inputs.days || '30' }}"

          echo "Collecting review data for PRs created since: $start_date"

          # Create output files
          mkdir -p review-data

          # Get PRs and their reviews (filtering will be done in Python for better control)
          gh pr list \
            --repo "${{ github.repository }}" \
            --state all \
            --limit 1000 \
            --json number,title,author,createdAt,mergedAt,reviews,reviewRequests \
            --jq ".[] | select(.createdAt >= \"$start_date\")" \
            > review-data/prs.json

          # Process review data to generate metrics focused on who is reviewing and review counts
          python3 << 'EOF'
          import json
          import sys
          from collections import defaultdict

          # Load PR data from JSONL file (one JSON object per line)
          with open('review-data/prs.json', 'r') as f:
            prs = []
            for line_num, line in enumerate(f, 1):
              if line.strip():
                try:
                  prs.append(json.loads(line))
                except json.JSONDecodeError as e:
                  print(f"Warning: Skipping malformed JSON on line {line_num}: {e}", file=sys.stderr)

          print(f"Processing {len(prs)} PRs...")

          # Initialize metrics - track both reviewers and contributors
          reviewer_stats = defaultdict(lambda: {
            'reviews_given': 0,
            'prs_reviewed': set()
          })

          contributor_stats = defaultdict(lambda: {
            'prs_authored': 0
          })

          total_reviews = 0

          # Process each PR to count reviews per reviewer and track contributors
          for pr in prs:
            pr_number = pr['number']
            author = pr['author']['login']

            # Track PR authors (contributors)
            contributor_stats[author]['prs_authored'] += 1

            # Process reviews
            for review in pr.get('reviews', []):
              reviewer = review['author']['login']

              total_reviews += 1
              reviewer_stats[reviewer]['reviews_given'] += 1
              reviewer_stats[reviewer]['prs_reviewed'].add(pr_number)

          # Convert sets to counts for JSON serialization
          for reviewer in reviewer_stats:
            reviewer_stats[reviewer]['prs_reviewed'] = len(reviewer_stats[reviewer]['prs_reviewed'])

          # Find contributors who haven't done reviews
          all_contributors = set(contributor_stats.keys())
          all_reviewers = set(reviewer_stats.keys())
          contributors_not_reviewing = all_contributors - all_reviewers

          # Save comprehensive metrics
          metrics = {
            'summary': {
              'total_prs_analyzed': len(prs),
              'total_reviews': total_reviews,
              'total_reviewers': len(reviewer_stats),
              'total_contributors': len(contributor_stats),
              'contributors_not_reviewing': len(contributors_not_reviewing)
            },
            'reviewer_stats': dict(reviewer_stats),
            'contributor_stats': dict(contributor_stats),
            'contributors_not_reviewing': list(contributors_not_reviewing)
          }

          with open('review-data/metrics.json', 'w') as f:
            json.dump(metrics, f, indent=2)

          print("Review metrics generated successfully")
          print(f"Total reviewers: {len(reviewer_stats)}")
          print(f"Total reviews: {total_reviews}")
          print(f"Total contributors: {len(contributor_stats)}")
          print(f"Contributors not reviewing: {len(contributors_not_reviewing)}")
          EOF

      - name: Generate Report
        run: |
          mkdir -p .github/reports
          report_date=$(date +%Y-%m-%d)

          # Create Python script for simplified report generation
          cat > generate_report.py << 'PYTHON_SCRIPT'
          import json
          import os
          import sys
          from datetime import datetime

          try:
            # Load metrics
            with open('review-data/metrics.json', 'r') as f:
              metrics = json.load(f)

            summary = metrics['summary']
            reviewer_stats = metrics['reviewer_stats']
            contributor_stats = metrics['contributor_stats']
            contributors_not_reviewing = metrics['contributors_not_reviewing']

            # Sort reviewers by review count
            sorted_reviewers = sorted(reviewer_stats.items(), key=lambda x: x[1]['reviews_given'], reverse=True)

            # Sort contributors by PR count
            sorted_contributors = sorted(contributor_stats.items(), key=lambda x: x[1]['prs_authored'], reverse=True)

            repo_name = os.environ.get('GITHUB_REPOSITORY', 'Unknown')
            analysis_days = os.environ.get('ANALYSIS_DAYS', '30')

            # Generate comprehensive markdown report
            report_lines = [
              "# Code Review Metrics Report",
              "",
              f"**Repository:** {repo_name}",
              f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}",
              f"**Period:** Last {analysis_days} days",
              "",
              "## Summary",
              "",
              f"- **Total PRs Analyzed:** {summary['total_prs_analyzed']}",
              f"- **Total Reviews Given:** {summary['total_reviews']}",
              f"- **Active Reviewers:** {summary['total_reviewers']}",
              f"- **Total Contributors:** {summary['total_contributors']}",
              f"- **Contributors Not Reviewing:** {summary['contributors_not_reviewing']}",
              "",
              "## Who Is Reviewing Code",
              "",
              "| Reviewer | Reviews Given | PRs Reviewed |",
              "|----------|---------------|--------------|"
            ]

            # Add all reviewers to table (focused on who and how many)
            for reviewer, stats in sorted_reviewers:
              report_lines.append(f"| {reviewer} | {stats['reviews_given']} | {stats['prs_reviewed']} |")

            # Add section for contributors who haven't done reviews
            report_lines.extend([
              "",
              "## Contributors Who Have Not Done Reviews",
              ""
            ])

            if contributors_not_reviewing:
              report_lines.extend([
                "| Contributor | PRs Authored |",
                "|-------------|--------------|"
              ])

              for contributor in contributors_not_reviewing:
                prs_authored = contributor_stats[contributor]['prs_authored']
                report_lines.append(f"| {contributor} | {prs_authored} |")
            else:
              report_lines.append("*All contributors are also participating in code reviews* ✅")

            # Add insights focused on reviewer activity
            most_active = sorted_reviewers[0] if sorted_reviewers else ('N/A', {'reviews_given': 0})
            avg_reviews = summary['total_reviews'] / summary['total_reviewers'] if summary['total_reviewers'] > 0 else 0
            review_participation = (summary['total_reviewers'] / summary['total_contributors'] * 100) if summary['total_contributors'] > 0 else 0

            report_lines.extend([
              "",
              "## Key Insights",
              "",
              f"- **Most Active Reviewer:** {most_active[0]} ({most_active[1]['reviews_given']} reviews)",
              f"- **Average Reviews per Reviewer:** {avg_reviews:.1f} reviews",
              f"- **Review Participation Rate:** {review_participation:.1f}% of contributors are also reviewing",
              f"- **Review Distribution:** {summary['total_reviews']} total reviews across {summary['total_prs_analyzed']} PRs",
              "",
              "---",
              "*Report shows who is reviewing code, review volume per person, and contributors who could participate more in reviews*"
            ])

            # Save report
            report_content = "\n".join(report_lines)
            output_file = f'.github/reports/code-review-metrics-{os.environ.get("GITHUB_RUN_NUMBER", "test")}.md'
            with open(output_file, 'w') as f:
              f.write(report_content)

            print("Report generated successfully")
            print(f"Output file: {output_file}")

          except Exception as e:
            print(f"Error generating report: {e}")
            sys.exit(1)
          PYTHON_SCRIPT

          # Run the report generation
          python3 generate_report.py
        env:
          GITHUB_REPOSITORY: ${{ github.repository }}
          ANALYSIS_DAYS: ${{ github.event.inputs.days || '30' }}
          GITHUB_RUN_NUMBER: ${{ github.run_number }}

      - name: Upload Artifacts
        uses: actions/upload-artifact@v7
        with:
          name: code-review-metrics-${{ github.run_number }}
          path: |
            .github/reports/code-review-metrics-*.md
            review-data/metrics.json
          retention-days: 90

      - name: Job Summary
        run: |
          echo "# Code Review Metrics Generated 📊" >> $GITHUB_STEP_SUMMARY
          days="${{ github.event.inputs.days || '30' }}"
          echo "Period: ${days} days" >> $GITHUB_STEP_SUMMARY
          echo "Focus: Who is reviewing code and review volume per reviewer" >> $GITHUB_STEP_SUMMARY
          echo "Report artifacts uploaded with 90-day retention" >> $GITHUB_STEP_SUMMARY

          # Add summary stats to GitHub Actions summary
          if [ -f review-data/metrics.json ]; then
            python3 << 'EOF'
          import json
          import os

          with open('review-data/metrics.json', 'r') as f:
            metrics = json.load(f)

          summary = metrics['summary']
          reviewer_stats = metrics['reviewer_stats']

          # Find most active reviewer
          if reviewer_stats:
            top_reviewer = max(reviewer_stats.items(), key=lambda x: x[1]['reviews_given'])
            top_reviewer_name, top_reviewer_stats = top_reviewer
          else:
            top_reviewer_name, top_reviewer_stats = 'N/A', {'reviews_given': 0}

          with open(os.environ['GITHUB_STEP_SUMMARY'], 'a') as f:
            f.write(f"\n## Key Metrics\n")
            f.write(f"- **Active Reviewers:** {summary['total_reviewers']}\n")
            f.write(f"- **Total Reviews:** {summary['total_reviews']}\n")
            f.write(f"- **PRs Analyzed:** {summary['total_prs_analyzed']}\n")
            f.write(f"- **Total Contributors:** {summary['total_contributors']}\n")
            f.write(f"- **Contributors Not Reviewing:** {summary['contributors_not_reviewing']}\n")
            f.write(f"- **Most Active Reviewer:** {top_reviewer_name} ({top_reviewer_stats['reviews_given']} reviews)\n")
          EOF
          fi