Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Anti-spam github-action #2259

Draft
wants to merge 26 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
df712f8
[#2258] antispam workflow
GuillaumeDua Feb 23, 2025
ee8a2bc
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
a35f45a
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
4785d9d
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
f46e342
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
db2636f
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
1868ba3
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
9cb1704
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
3a229ab
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
54ef95e
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
93dee9a
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
72fd0ab
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
7dce9d2
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
5c805c2
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
b12a19f
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
492b15e
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
c3b11f6
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
294eea7
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
548bec6
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
265fcc1
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
558296f
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
813e81d
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
722a89d
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
d61ce3b
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
a733fac
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
6343bb3
[antispam] experimenting (WIP)
GuillaumeDua Mar 2, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .github/workflows/antispam.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: Anti-Spam Issue & PR Checker

on:
workflow_dispatch: # manual testing only
issues:
types: [opened, reopened]
pull_request:
types: [opened, reopened]

jobs:
check-spam:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/github-script@v7
env:
SHA: '${{env.parentSHA}}'
with:
script: |
const script = require('.github/workflows/scripts/antispam.js')
await script({ github, context })
298 changes: 298 additions & 0 deletions .github/workflows/scripts/antispam.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,298 @@
let configuration = {
label_if_suspicious: true,
comment_if_suspicious: true,
close_if_suspicious: true,
suspicious_criteria_tolerated: 0
};

async function make_information_report({ user }) {
// we might also create a (pre-)report for spam to GH using the following informations:
return `> [!WARNING] About the author:
>
> | information | value |
> | ----------- | ----- |
> | email | ${user.email || '' } |
> | login | ${user.login || '' } |
> | name | ${user.name || '' } |
> | location | ${user.location || '' } |
> | blog | ${user.blog || '' } |
> | location | ${user.location || '' } |
`
}

async function when_suspicious({ github, context, failed_checks }){

// REFACTO: might wanna use a score of confidence (how suspicious it is), then react on that

const reasons = failed_checks.map(check => `> - ${check.reason}`).join("\n");
const commentBody = `> [!WARNING] This issue/PR has been automatically flagged as [suspicious] as it might not meet contribution requirements.
> Please read our contribution guide before submitting.
>
> Reason(s):
>
${reasons}
`;

console.log("Body of the produced comment:\n", commentBody);

if (context.eventName === 'workflow_dispatch') // so we can test manually
return;

const { owner, repo } = context.repo;
const issueNumber = context.payload.number; // either issue or PR

if (configuration.comment_if_suspicious) {
await github.rest.issues.createComment({
owner,
repo,
issue_number: issueNumber,
body: `${commentBody}`
});
}
if (! configuration.label_if_suspicious) {
await github.rest.issues.addLabels({
owner,
repo,
issue_number: issueNumber,
labels: ["suspicious"]
});
}
if (configuration.close_if_suspicious) {
await github.rest.issues.update({
owner,
repo,
issue_number: issueNumber,
state: "closed"
});
}
}

class Check {
constructor({ predicate, reason }) {
this.predicate = predicate;
this.reason = reason;
}

async pass() {
const result = await this.predicate();
if (typeof result !== "boolean")
console.error("Check: invalid argument: not a predicate");

console.debug("- check: ", (result ? "PASSED" : "FAILED"), " => ", this.reason)

return result;
}
}

async function run({ github, context }) {

const username = context.actor;
const { data: user } = await github.rest.users.getByUsername({ username: username });

const payload = context.payload;

const issue_or_pr = (() => {
if (payload.issue) return payload.issue;
if (payload.pull_request) return payload.pull_request;
throw new Error("Only supports issues and PRs")
})();


console.log('Checking', { user: username, title: issue_or_pr.title })

const isAuthorOnlyContributionOnGH = await (async () => {
// WARNING: Depending on the time of day, event latency can be anywhere from 30s to 6h. (source: https://octokit.github.io/rest.js/v21/)
const { data: events } = await github.rest.activity.listEventsForAuthenticatedUser({
username: username,
per_page: 1
});
return events.length === 0;
})();
const WasAuthorRecentlyCreated = (() => {

const time_point = (() => {
let value = Date.parse(issue_or_pr.created_at); //new Date();
value.setHours(value.getHours() - 2);
return value;
})();
const create_at = new Date(user.created_at);
return create_at >= time_point;
})();
const isTitleOrBodyTooShort = (() => {

if (context.eventName === 'workflow_dispatch') // issues or pull_request
return false;

const threshold = 20;
return issue_or_pr.length < threshold
|| issue_or_pr.length < threshold;
})();

const checks = [
new Check({
predicate: () => ! WasAuthorRecentlyCreated,
reason: "Author account was recently created"
}),
new Check({
predicate: () => ! isAuthorOnlyContributionOnGH,
reason: "Author first contribution to any GitHub project"
}),
new Check({
predicate: () => user.followers !== 0 && user.following !== 0,
reason: "Author has no relationships"
}),
new Check({
predicate: () => user.public_repos !== 0 && user.public_gists !== 0,
reason: "Author has no public repo/gist"
}),
new Check({
predicate: () => ! isTitleOrBodyTooShort,
reason: "Issue/PR title or body too short"
}),
];

// IDEA: mandatory checks -> if any fails, then reject
// for other checks
// use a weights/factors instead of booleans
// compute a confidence score to check against a threshold => if below, then reject

async function failedChecks(checks) {
const results = await Promise.all(
checks.map(async (check) => ({
check,
passed: await check.pass(),
}))
);
return results
.filter(({ passed }) => ! passed)
.map(({ check }) => check);
}

failedChecks(checks).then(failed_checks => {

console.log("Checks: ", {
passed: checks.length - failed_checks.length,
failed: failed_checks.length
})

if (failed_checks.length <= configuration.suspicious_criteria_tolerated) {
console.info("Not suspicious");
return;
}

when_suspicious({ github, context, failed_checks });

make_information_report({ user: user }).then(user_information_as_comment => {
// do stuffs with user_information_as_comment
console.log("user_information_as_comment", user_information_as_comment);
});
});
};

class Testing {

static enabled = true;

static #parseGitHubUrl({ url }) {
const match = url.match(/github\.com\/([^\/]+)\/([^\/]+)\/(issues|pull)\/(\d+)/);
if (!match || match.length !== 5)
return null;

return {
owner: match[1],
repo: match[2],
type: match[3], // "issues" or "pull"
number: parseInt(match[4], 10),
};
}

static async #getContext({ url, github }) {

const parsed_url = Testing.#parseGitHubUrl({ url: url });
if (!parsed_url) {
throw new Error(`Invalid GitHub issue/PR URL: [${url}]`);
}

const { owner, repo, type, number } = parsed_url;
let response;

try {
response = (type === "issues")
? await github.rest.issues.get({ owner, repo, issue_number: number })
: await github.rest.pulls.get({ owner, repo, pull_number: number })
;
}
catch (error) {
throw new Error(`Failed to fetch ${type.slice(0, -1)} #${number}: ${error.message}`);
}

// return response.data;

// make context adapter => could perform it upstream with graphql
const payload_content = {
title: response.data.title,
body: response.data.body,
created_at: new Date() // now
};
return {
actor: response.data.user.login,
sender: response.data.user,
eventName: type,
payload: type === 'issues' ? { issue: payload_content } : { pull_request: payload_content },
repo: response.data.repo
}
}

static #cases = {

legits: [
'https://github.com/isocpp/CppCoreGuidelines/pull/2258',
'https://github.com/isocpp/CppCoreGuidelines/pull/2259'
],
spams: [
'https://github.com/isocpp/CppCoreGuidelines/pull/2257',
'https://github.com/isocpp/CppCoreGuidelines/pull/2241',
'https://github.com/isocpp/CppCoreGuidelines/pull/2254',
'https://github.com/isocpp/CppCoreGuidelines/pull/2252',
'https://github.com/isocpp/CppCoreGuidelines/issues/2249',
'https://github.com/isocpp/CppCoreGuidelines/issues/2238',
'https://github.com/isocpp/CppCoreGuidelines/issues/2225',
'https://github.com/isocpp/CppCoreGuidelines/issues/2255',
]
}

static async run({ github }){
console.log('Testing enabled')

Testing.enabled = true;
configuration = {
label_if_suspicious: false,
comment_if_suspicious: false,
close_if_suspicious: false,
suspicious_criteria_tolerated: 0
}

// IDEA: run N-by-N to limit memory bloat
await Promise.all(
Testing.#cases.spams.map((url) => Testing.#getContext({ url, github }))
).then(async (testing_contexts) => {
testing_contexts.forEach(
async (value) => await run({ github, context: value })
)
});
}
}

module.exports = async ({ github, context }) => {

if (context.eventName !== 'workflow_dispatch')
return await run({ github, context });

return await Testing.run({ github });
};

/*
WIP:
A bit more experiments: made some changes so such a CI can run on an arbitrary issue/PR URL,
so it's easier to test against a list of. See https://github.com/GuillaumeDua/CppCoreGuidelines/actions/runs/13616115173/job/38059374996#step:3:1547
*/