OpenHUTB · donghaiwang · Mar 9, 2026 · Dec 13, 2025 · Mar 8, 2026 · Mar 8, 2026
diff --git a/.github/workflows/compute_involvement.yml b/.github/workflows/compute_involvement.yml
@@ -1,22 +1,29 @@
-
 name: compute involvement degree
 
-# 触发条件：每次 push 代码时运行
-on: [repository_dispatch]
+# 修正后的触发器：支持手动输入时间、自动推送到 main、以及远程触发
+on:
+  push:
+    branches:
+      - main
+  repository_dispatch:
+    types: [opened, deleted, custom-event]
+  workflow_dispatch:
+    inputs:
+      since:
+        description: '统计开始时间 (例如: 2026-03-05 12:00)'
+        required: false
+      until:
+        description: '统计结束时间 (例如: 2026-03-07 12:00)'
+        required: false
 
 jobs:
   compute:
-    # 使用矩阵策略，同时在 ubuntu 和 windows 上运行
-    strategy:
-      matrix:
-        os: [ubuntu-latest]  # 添加 windows-latest
-
-    # 运行环境
-    runs-on: ${{ matrix.os }}
+    runs-on: ubuntu-latest
 
     steps:
-      # 1. 拉取完整 Git 历史（必须保留，否则无法统计所有提交和代码行数）
-      - uses: actions/checkout@v4
+      # 1. 拉取完整 Git 历史（逻辑不变：SHA 追溯必须获取所有 commit）
+      - name: Checkout Code
+        uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
@@ -25,13 +32,28 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version: '3.x'
-          architecture: 'x64'
 
-      # 3. 安装依赖
-      # 在 Windows 上 pip 命令相同，但 GitPython 在 Windows 下也能正常工作
+      # 3. 核心改动：安装 requests 库，因为新脚本需要调用 GitHub API
       - name: Install Python dependencies
-        run: python -m pip install --upgrade pip requests gitpython
-
-      # 4. 执行你的贡献统计脚本
-      - name: Display involvement degree (${{ matrix.os }})
-        run: python contribution_analysis.py -t ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          python -m pip install --upgrade pip
+          pip install requests
+
+      # 4. 执行新的统计脚本
+      # 注意：增加了 -r 参数传递当前仓库名，并匹配了 workflow_dispatch 的输入
+      - name: Execute SHA-based Analysis
+        run: |
+          python contribution_analysis.py \
+            -t ${{ secrets.GITHUB_TOKEN }} \
+            -r ${{ github.repository }} \
+            --ignore "ignore_users.json" \
+            --since "${{ github.event.inputs.since }}" \
+            --until "${{ github.event.inputs.until }}" \
+            --output "contribution_report.csv"
+
+      # 5. 自动上传产物（确保你能在 Actions 页面直接下载 CSV）
+      - name: Upload Artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: commit-stats-report
+          path: contribution_report.csv
diff --git a/contribution_analysis.py b/contribution_analysis.py
@@ -1,131 +1,80 @@
-import requests  # 用于发起网络请求
-import argparse  # 用于解析命令行参数
-import os  # 用于操作系统级别的操作
-from collections import defaultdict, Counter  # 用于数据结构操作
-import git  # 用于操作Git库
-
-# 设置命令行参数解析
-#通过argparse库，代码定义了一个命令行工具，允许用户通过命令行传入GitHub的访问令牌
-argparser = argparse.ArgumentParser(description='Involvement Degree')
-argparser.add_argument('-t', '--token', help='your personal github access token')
-args = argparser.parse_args()
-
-# 获取GitHub访问令牌
-TOKEN = args.token
-#设置请求头包含授权信息
-# 设置请求头
-headers = {
-    'Authorization': f'token {TOKEN}',
-    'Accept': 'application/vnd.github.v3+json'
-}
-
-# 仓库信息
-owner = 'OpenHUTB'  # 仓库所有者
-repo = 'nn'  # 仓库名称
-
-#########################################
-####### 统计代码添加和删除行数 ########
-#########################################
-def commit_info():
-    from git.repo import Repo
-
-    # 初始化本地仓库路径
-    local_path = os.path.join('.')
-    repo = Repo(local_path)
-
-    # 获取提交日志，格式为作者名字
-    log_info = repo.git.log('--pretty=format:%an')
-    authors = log_info.splitlines()
-
-    # 定义别名映射
-    alias_map = {
-        '刘子民': 'ziminliu',
-    }
-
-    # 标准化作者名字
-    normalized_authors = [alias_map.get(author, author) for author in authors]
-
-    # 统计每个作者的提交次数
-    author_counts = Counter(normalized_authors)
-    print("提交次数：")
-    for author, count in author_counts.most_common():
-        print(f"{author}: {count} 次提交")
-
-    # 获取提交日志，格式为作者名字，并包含增删行数
-    log_data = repo.git.log('--pretty=format:%an', '--numstat')
-
-    # 统计每个作者的增加行数
-    author_stats = defaultdict(lambda: {'added': 0, 'deleted': 0})
-    #初始化当前作者为None
-    current_author = None
-    #遍历日志的每一行
-    for line in log_data.splitlines():
-        #如果行中不包括制表符（\t）或者行内容为空
-        if '\t' not in line or line.isdigit():
-            #将当前作者设为该行内容（去除首尾空白字符）
-            current_author = line.strip()
-        elif '\t' in line:
-            added, deleted, _ = line.split('\t')
-            if added != '-':
-                author_stats[current_author]['added'] += int(added)
-            if deleted != '-':
-                author_stats[current_author]['deleted'] += int(deleted)
-
-    # 输出每个作者的增加行数
-    for author, stats in author_stats.items():
-        print(f"{author}: 添加 {stats['added']} 行, 删除 {stats['deleted']} 行")
-
-commit_info()
-
-#########################################
-####### 统计用户提问和评论数 ##############
-#########################################
-issue_counts = {}
-comment_counts = {}
-
-page = 1
-while True:
-    url = f'https://api.github.com/repos/{owner}/{repo}/issues?state=all&per_page=100&page={page}'
-    response = requests.get(url, headers=headers)
-#处理API响应    
-    if response.status_code != 200:
-        print("请求失败，请检查网络连接或GitHub令牌。")
-        break
-
-    #将响应数据解析为JSON格式
-    issues = response.json()
-    if not issues:
-        #跳出循环
-        break
-
-    #遍历所有问题
-    for issue in issues:
-        if 'pull_request' in issue:
-            #跳过当前循环，继续下一个问题的处理
-            continue
-
-        #获取问题的用户登陆名
-        user = issue['user']['login']
-        #更新用户提出问题的计数
-        issue_counts[user] = issue_counts.get(user, 0) + 1
-
-        comments_url = issue['comments_url']
-        comments_response = requests.get(comments_url, headers=headers)
-        comments = comments_response.json()
-
-        for comment in comments:
-            commenter = comment['user']['login']
-            comment_counts[commenter] = comment_counts.get(commenter, 0) + 1
-
-    page += 1
-
-sorted_issue_counts = dict(sorted(issue_counts.items(), key=lambda item: item[1], reverse=True))
-sorted_comment_counts = dict(sorted(comment_counts.items(), key=lambda item: item[1], reverse=True))
-#将统计结果按照次数从高到低排序并打印出来
-print("提问次数：")
-for user, count in sorted_issue_counts.items():
-    print(f"{user}: {count}")
-
-print("\n回答次数：")
-for user, count in sorted_comment_counts.items():
-    print(f"{user}: {count}")
+import subprocess
+import argparse
+import csv
+import json
+import os
+import requests
+from collections import Counter
+
+def get_login_by_sha(sha, repo, token, cache):
+    """通过 Commit SHA 获取真实的 GitHub Login ID (带缓存)"""
+    if sha in cache:
+        return cache[sha]
+
+    url = f"https://api.github.com/repos/{repo}/commits/{sha}"
+    headers = {"Authorization": f"token {token}"}
+    try:
+        response = requests.get(url, headers=headers, timeout=10)
+        if response.status_code == 200:
+            data = response.json()
+            author_obj = data.get("author")
+            if author_obj:
+                login = author_obj.get("login")
+                cache[sha] = login
+                return login
+    except Exception as e:
+        print(f"SHA查询异常({sha}): {e}")
+    return None
+
+def load_ignore_users(file_path):
+    """从外部 JSON 加载屏蔽名单"""
+    if not os.path.exists(file_path):
+        return set()
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            return {str(u).strip().lower() for u in json.load(f)}
+    except:
+        return set()
+
+def run_analysis():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-t", "--token", required=True)
+    parser.add_argument("-r", "--repo", required=True)
+    parser.add_argument("--since")
+    parser.add_argument("--until")
+    parser.add_argument("--ignore", default="ignore_users.json")
+    parser.add_argument("--output", default="commit_stats.csv")
+    args = parser.parse_args()
+
+    ignore_set = load_ignore_users(args.ignore)
+
+    # 获取本地 Git 仓库的 Commit SHA 列表
+    cmd = ["git", "log", "--pretty=%H"]
+    if args.since: cmd.append(f"--since={args.since}")
+    if args.until: cmd.append(f"--until={args.until}")
+
+    result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+    if result.returncode != 0:
+        return
+
+    shas = [s.strip() for s in result.stdout.split('\n') if s.strip()]
+    login_counts = Counter()
+    sha_to_login_cache = {}
+
+    print(f"检测到 {len(shas)} 个提交，正在追溯归属...")
+
+    for sha in shas:
+        login = get_login_by_sha(sha, args.repo, args.token, sha_to_login_cache)
+        if login and login.lower() not in ignore_set:
+            login_counts[login] += 1
+
+    # 导出
+    sorted_stats = sorted(login_counts.items(), key=lambda x: x[1], reverse=True)
+    with open(args.output, "w", newline="", encoding="utf-8") as f:
+        writer = csv.writer(f)
+        writer.writerow(["GitHub_Login", "Commits"])
+        writer.writerows(sorted_stats)
+    print(f"分析完成，导出至 {args.output}")
+
+if __name__ == "__main__":
+    run_analysis()
diff --git a/ignore_users.json b/ignore_users.json
@@ -0,0 +1,5 @@
+[
+    "Haidong Wang",
+    "donghaiwang",
+    "whd@hutb.edu.cn"
+]