Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 42 additions & 20 deletions .github/workflows/compute_involvement.yml
Original file line number Diff line number Diff line change
@@ -1,22 +1,29 @@

name: compute involvement degree

# 触发条件:每次 push 代码时运行
on: [repository_dispatch]
# 修正后的触发器:支持手动输入时间、自动推送到 main、以及远程触发
on:
push:
branches:
- main
repository_dispatch:
types: [opened, deleted, custom-event]
workflow_dispatch:
inputs:
since:
description: '统计开始时间 (例如: 2026-03-05 12:00)'
required: false
until:
description: '统计结束时间 (例如: 2026-03-07 12:00)'
required: false

jobs:
compute:
# 使用矩阵策略,同时在 ubuntu 和 windows 上运行
strategy:
matrix:
os: [ubuntu-latest] # 添加 windows-latest

# 运行环境
runs-on: ${{ matrix.os }}
runs-on: ubuntu-latest

steps:
# 1. 拉取完整 Git 历史(必须保留,否则无法统计所有提交和代码行数)
- uses: actions/checkout@v4
# 1. 拉取完整 Git 历史(逻辑不变:SHA 追溯必须获取所有 commit)
- name: Checkout Code
uses: actions/checkout@v4
with:
fetch-depth: 0

Expand All @@ -25,13 +32,28 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: '3.x'
architecture: 'x64'

# 3. 安装依赖
# 在 Windows 上 pip 命令相同,但 GitPython 在 Windows 下也能正常工作
# 3. 核心改动:安装 requests 库,因为新脚本需要调用 GitHub API
- name: Install Python dependencies
run: python -m pip install --upgrade pip requests gitpython

# 4. 执行你的贡献统计脚本
- name: Display involvement degree (${{ matrix.os }})
run: python contribution_analysis.py -t ${{ secrets.GITHUB_TOKEN }}
run: |
python -m pip install --upgrade pip
pip install requests
# 4. 执行新的统计脚本
# 注意:增加了 -r 参数传递当前仓库名,并匹配了 workflow_dispatch 的输入
- name: Execute SHA-based Analysis
run: |
python contribution_analysis.py \
-t ${{ secrets.GITHUB_TOKEN }} \
-r ${{ github.repository }} \
--ignore "ignore_users.json" \
--since "${{ github.event.inputs.since }}" \
--until "${{ github.event.inputs.until }}" \
--output "contribution_report.csv"
# 5. 自动上传产物(确保你能在 Actions 页面直接下载 CSV)
- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
name: commit-stats-report
path: contribution_report.csv
211 changes: 80 additions & 131 deletions contribution_analysis.py
Original file line number Diff line number Diff line change
@@ -1,131 +1,80 @@
import requests # 用于发起网络请求
import argparse # 用于解析命令行参数
import os # 用于操作系统级别的操作
from collections import defaultdict, Counter # 用于数据结构操作
import git # 用于操作Git库

# 设置命令行参数解析
#通过argparse库,代码定义了一个命令行工具,允许用户通过命令行传入GitHub的访问令牌
argparser = argparse.ArgumentParser(description='Involvement Degree')
argparser.add_argument('-t', '--token', help='your personal github access token')
args = argparser.parse_args()

# 获取GitHub访问令牌
TOKEN = args.token
#设置请求头包含授权信息
# 设置请求头
headers = {
'Authorization': f'token {TOKEN}',
'Accept': 'application/vnd.github.v3+json'
}

# 仓库信息
owner = 'OpenHUTB' # 仓库所有者
repo = 'nn' # 仓库名称

#########################################
####### 统计代码添加和删除行数 ########
#########################################
def commit_info():
from git.repo import Repo

# 初始化本地仓库路径
local_path = os.path.join('.')
repo = Repo(local_path)

# 获取提交日志,格式为作者名字
log_info = repo.git.log('--pretty=format:%an')
authors = log_info.splitlines()

# 定义别名映射
alias_map = {
'刘子民': 'ziminliu',
}

# 标准化作者名字
normalized_authors = [alias_map.get(author, author) for author in authors]

# 统计每个作者的提交次数
author_counts = Counter(normalized_authors)
print("提交次数:")
for author, count in author_counts.most_common():
print(f"{author}: {count} 次提交")

# 获取提交日志,格式为作者名字,并包含增删行数
log_data = repo.git.log('--pretty=format:%an', '--numstat')

# 统计每个作者的增加行数
author_stats = defaultdict(lambda: {'added': 0, 'deleted': 0})
#初始化当前作者为None
current_author = None
#遍历日志的每一行
for line in log_data.splitlines():
#如果行中不包括制表符(\t)或者行内容为空
if '\t' not in line or line.isdigit():
#将当前作者设为该行内容(去除首尾空白字符)
current_author = line.strip()
elif '\t' in line:
added, deleted, _ = line.split('\t')
if added != '-':
author_stats[current_author]['added'] += int(added)
if deleted != '-':
author_stats[current_author]['deleted'] += int(deleted)

# 输出每个作者的增加行数
for author, stats in author_stats.items():
print(f"{author}: 添加 {stats['added']} 行, 删除 {stats['deleted']} 行")

commit_info()

#########################################
####### 统计用户提问和评论数 ##############
#########################################
issue_counts = {}
comment_counts = {}

page = 1
while True:
url = f'https://api.github.com/repos/{owner}/{repo}/issues?state=all&per_page=100&page={page}'
response = requests.get(url, headers=headers)
#处理API响应
if response.status_code != 200:
print("请求失败,请检查网络连接或GitHub令牌。")
break

#将响应数据解析为JSON格式
issues = response.json()
if not issues:
#跳出循环
break

#遍历所有问题
for issue in issues:
if 'pull_request' in issue:
#跳过当前循环,继续下一个问题的处理
continue

#获取问题的用户登陆名
user = issue['user']['login']
#更新用户提出问题的计数
issue_counts[user] = issue_counts.get(user, 0) + 1

comments_url = issue['comments_url']
comments_response = requests.get(comments_url, headers=headers)
comments = comments_response.json()

for comment in comments:
commenter = comment['user']['login']
comment_counts[commenter] = comment_counts.get(commenter, 0) + 1

page += 1

sorted_issue_counts = dict(sorted(issue_counts.items(), key=lambda item: item[1], reverse=True))
sorted_comment_counts = dict(sorted(comment_counts.items(), key=lambda item: item[1], reverse=True))
#将统计结果按照次数从高到低排序并打印出来
print("提问次数:")
for user, count in sorted_issue_counts.items():
print(f"{user}: {count}")

print("\n回答次数:")
for user, count in sorted_comment_counts.items():
print(f"{user}: {count}")
import subprocess
import argparse
import csv
import json
import os
import requests
from collections import Counter

def get_login_by_sha(sha, repo, token, cache):
"""通过 Commit SHA 获取真实的 GitHub Login ID (带缓存)"""
if sha in cache:
return cache[sha]

url = f"https://api.github.com/repos/{repo}/commits/{sha}"
headers = {"Authorization": f"token {token}"}
try:
response = requests.get(url, headers=headers, timeout=10)
if response.status_code == 200:
data = response.json()
author_obj = data.get("author")
if author_obj:
login = author_obj.get("login")
cache[sha] = login
return login
except Exception as e:
print(f"SHA查询异常({sha}): {e}")
return None

def load_ignore_users(file_path):
"""从外部 JSON 加载屏蔽名单"""
if not os.path.exists(file_path):
return set()
try:
with open(file_path, 'r', encoding='utf-8') as f:
return {str(u).strip().lower() for u in json.load(f)}
except:
return set()

def run_analysis():
parser = argparse.ArgumentParser()
parser.add_argument("-t", "--token", required=True)
parser.add_argument("-r", "--repo", required=True)
parser.add_argument("--since")
parser.add_argument("--until")
parser.add_argument("--ignore", default="ignore_users.json")
parser.add_argument("--output", default="commit_stats.csv")
args = parser.parse_args()

ignore_set = load_ignore_users(args.ignore)

# 获取本地 Git 仓库的 Commit SHA 列表
cmd = ["git", "log", "--pretty=%H"]
if args.since: cmd.append(f"--since={args.since}")
if args.until: cmd.append(f"--until={args.until}")

result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.returncode != 0:
return

shas = [s.strip() for s in result.stdout.split('\n') if s.strip()]
login_counts = Counter()
sha_to_login_cache = {}

print(f"检测到 {len(shas)} 个提交,正在追溯归属...")

for sha in shas:
login = get_login_by_sha(sha, args.repo, args.token, sha_to_login_cache)
if login and login.lower() not in ignore_set:
login_counts[login] += 1

# 导出
sorted_stats = sorted(login_counts.items(), key=lambda x: x[1], reverse=True)
with open(args.output, "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["GitHub_Login", "Commits"])
writer.writerows(sorted_stats)
print(f"分析完成,导出至 {args.output}")

if __name__ == "__main__":
run_analysis()
5 changes: 5 additions & 0 deletions ignore_users.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[
"Haidong Wang",
"donghaiwang",
"whd@hutb.edu.cn"
]