diff --git a/src/application/core/src/Message.ts b/src/application/core/src/Message.ts index b856c4e0..5a63e020 100644 --- a/src/application/core/src/Message.ts +++ b/src/application/core/src/Message.ts @@ -67,7 +67,7 @@ export class Message extends Base { await this.state.database.guildRecordOneDay.upsertAll(query); } - public async includeInviteLink(content: string) { + public async includeInviteLink(content: string): Promise { const { inviteLinks, normalUrls } = await findUrls(content); for (const url of normalUrls) { @@ -96,6 +96,6 @@ export class Message extends Base { } } - return inviteLinks; + return Array.from(new Set(inviteLinks)); } } diff --git a/src/domain/service/src/url.test.ts b/src/domain/service/src/url.test.ts index 1f58802b..4716f2c1 100644 --- a/src/domain/service/src/url.test.ts +++ b/src/domain/service/src/url.test.ts @@ -27,6 +27,12 @@ describe("url.ts", () => { // normal urls "https://google.com", "https://www.youtube.com/watch?v=AICOs2mNTrw", + + // duplicate + "https://discord.com/invite/foo", + "https://discord.com/invite/foo", + "https://example.com", + "https://example.com", ].join("\n"), ); @@ -55,17 +61,17 @@ describe("url.ts", () => { "https://discord.com/invite/fuga", "http://\\canary.diScordApP.com\\google.com⁂⌘∮/..\\/invite\\/youtube.com‖∠∇\\../twitter.com⁑∋〻\\../\\../fuga", "https://\\canary.diScordApP.com\\google.com⁂⌘∮/..\\/invite\\/youtube.com‖∠∇\\../twitter.com⁑∋〻\\../\\../fuga", + + "https://discord.com/invite/foo", ]); expect(urls.normalUrls).toEqual([ - "https://discord.com/", - "https://discord.com/terms", - "https://discord.com/terms/terms-of-service-april-2024", "https://discord.com/", "https://discord.com/terms", "https://discord.com/terms/terms-of-service-april-2024", "https://google.com", "https://www.youtube.com/watch?v=AICOs2mNTrw", + "https://example.com", ]); }); }); diff --git a/src/domain/service/src/url.ts b/src/domain/service/src/url.ts index ced55966..5ebc59a5 100644 --- a/src/domain/service/src/url.ts +++ b/src/domain/service/src/url.ts @@ -8,14 +8,14 @@ export type FindUrls = { export const URL_REGEXP = /https?:\/\/[\w!?/+\-_~=;.,*&@#$%()'[\]]+/im; -export const URL_REGEXP_DISCORD_GG = /(?:https?:\/\/)?discord\.gg\/[a-zA-Z0-9_-]+/im; +export const URL_REGEXP_DISCORD_GG = /(?:(https?|discord):\/\/)?discord\.gg\/[a-zA-Z0-9_-]+/im; export const URL_REGEXP_DISCORD_COM = - /(?:https?:\/\/)?\S*(?:discord\.com|discordapp\.com)\S*invite\S*[a-zA-Z0-9_-]+/im; + /(?:(https?|discord):\/\/)?\S*(?:discord\.com|discordapp\.com)\S*invite\S*[a-zA-Z0-9_-]+/im; export function findUrlsSync(content: string): FindUrls { - const inviteLinks: string[] = []; - const normalUrls: string[] = []; + const inviteLinks = new Set(); + const normalUrls = new Set(); const lines = content.split("\n"); for (const line of lines) { @@ -38,16 +38,16 @@ export function findUrlsSync(content: string): FindUrls { const isUrl = new RegExp(URL_REGEXP.source, "im").test(url); if (isInviteLink) { - inviteLinks.push(url); + inviteLinks.add(url); } else if (isUrl) { - normalUrls.push(url); + normalUrls.add(url); } } } return { - inviteLinks, - normalUrls, + inviteLinks: Array.from(inviteLinks), + normalUrls: Array.from(normalUrls), }; } diff --git a/src/infrastructure/http/src/invite.test.ts b/src/infrastructure/http/src/invite.test.ts index a2a610eb..faafa547 100644 --- a/src/infrastructure/http/src/invite.test.ts +++ b/src/infrastructure/http/src/invite.test.ts @@ -8,17 +8,32 @@ import type { MockedFunction } from "vitest"; import { LocalAddressError } from "./Error/LocalAddressError"; import { RedirectError } from "./Error/RedirectError"; -import { DISCORD_INVITE_LINK_START, isInviteLink, isUsedCf } from "./invite"; +import { DISCORD_DOMAINS, INVITE_PROTOCOL, isInviteLink, isUsedCf } from "./invite"; import { safeFetch } from "./safefetch"; +const DISCORD_INVITE_LINK_START = [ + "https://discord.com/invite/", + "https://ptb.discord.com/invite/", + "https://canary.discord.com/invite/", + "discord://discord.com/invite/", + "discord://ptb.discord.com/invite/", + "discord://canary.discord.com/invite/", +]; + const safeFetchMock = safeFetch as MockedFunction; -function fakeResponse(responseUrl: string, status = 200, body = ""): Response { +function fakeResponse( + responseUrl: string, + status = 200, + body = "", + extraHeaders: Record = {}, +): Response { const obj = { url: responseUrl, status, + headers: new Headers(extraHeaders), clone() { - return fakeResponse(responseUrl, status, body); + return fakeResponse(responseUrl, status, body, extraHeaders); }, async text() { return body; @@ -145,3 +160,103 @@ describe("isInviteLink", () => { expect((callInit?.headers as Record)?.["User-Agent"]).toBe("Mozilla/5.0"); }); }); + +// ─── DISCORD_INVITE_LINK_START equivalence ─────────────────────────────────── +// +// Verifies that the URL-parsing approach (isDiscordInviteLink) detects exactly +// the same URLs as the old DISCORD_INVITE_LINK_START.some(v => url.startsWith(v)) +// approach, for both the final response URL and the Location response header. + +describe("isInviteLink — DISCORD_INVITE_LINK_START equivalence", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + // ── resUrl path ───────────────────────────────────────────────────────────── + + describe("resUrl", () => { + it.each(DISCORD_INVITE_LINK_START)("content: true — %s + invite code", async (prefix) => { + safeFetchMock.mockResolvedValueOnce(fakeResponse(`${prefix}abc123`)); + const result = await isInviteLink("https://shortener.example.com/xyz"); + expect(result).not.toBeInstanceOf(Error); + expect((result as { content: boolean }).content).toBe(true); + }); + + it.each([ + // no trailing slash → pathname stays /invite and does not match /invite/ + "https://discord.com/invite", + "https://ptb.discord.com/invite", + "https://canary.discord.com/invite", + "discord://discord.com/invite", + // non-invite path + "https://discord.com/channels/123456/789012", + "https://discord.com/", + // subdomain spoofing + "https://evil.discord.com/invite/abc123", + // domain spoofing + "https://discord.com.evil.com/invite/abc123", + // unrelated domain + "https://example.com/invite/abc123", + ])("content: false — %s", async (nonInviteUrl) => { + safeFetchMock.mockResolvedValueOnce(fakeResponse(nonInviteUrl)); + const result = await isInviteLink("https://shortener.example.com/xyz"); + expect(result).not.toBeInstanceOf(Error); + expect((result as { content: boolean }).content).toBe(false); + }); + }); + + // ── location header path ──────────────────────────────────────────────────── + + describe("location header", () => { + it.each(DISCORD_INVITE_LINK_START)( + "content: true — Location: %s + invite code", + async (prefix) => { + safeFetchMock.mockResolvedValueOnce( + fakeResponse("https://example.com/", 200, "", { location: `${prefix}abc123` }), + ); + const result = await isInviteLink("https://shortener.example.com/xyz"); + expect(result).not.toBeInstanceOf(Error); + expect((result as { content: boolean }).content).toBe(true); + }, + ); + + it.each([ + "https://discord.com/invite", + "https://evil.discord.com/invite/abc123", + "https://discord.com.evil.com/invite/abc123", + "https://example.com/invite/abc123", + ])("content: false — Location: %s", async (location) => { + safeFetchMock.mockResolvedValueOnce( + fakeResponse("https://example.com/", 200, "", { location }), + ); + const result = await isInviteLink("https://shortener.example.com/xyz"); + expect(result).not.toBeInstanceOf(Error); + expect((result as { content: boolean }).content).toBe(false); + }); + + it("content: false when Location header is absent", async () => { + safeFetchMock.mockResolvedValueOnce(fakeResponse("https://example.com/page")); + const result = await isInviteLink("https://shortener.example.com/xyz"); + expect(result).not.toBeInstanceOf(Error); + expect((result as { content: boolean }).content).toBe(false); + }); + }); + + // ── INVITE_PROTOCOL / DISCORD_DOMAINS exports ──────────────────────────────── + // Verifies that all protocols and hosts present in the legacy DISCORD_INVITE_LINK_START + // are covered by their respective constants without omission. + + it("INVITE_PROTOCOL covers all protocols used in DISCORD_INVITE_LINK_START", () => { + const usedProtocols = new Set(DISCORD_INVITE_LINK_START.map((url) => new URL(url).protocol)); + for (const protocol of usedProtocols) { + expect(INVITE_PROTOCOL).toContain(protocol); + } + }); + + it("DISCORD_DOMAINS covers all hosts used in DISCORD_INVITE_LINK_START", () => { + const usedHosts = new Set(DISCORD_INVITE_LINK_START.map((url) => new URL(url).hostname)); + for (const host of usedHosts) { + expect(DISCORD_DOMAINS).toContain(host); + } + }); +}); diff --git a/src/infrastructure/http/src/invite.ts b/src/infrastructure/http/src/invite.ts index 8afcc8c8..b3266963 100644 --- a/src/infrastructure/http/src/invite.ts +++ b/src/infrastructure/http/src/invite.ts @@ -8,11 +8,23 @@ export type IsInviteLink = { isUsedCf: boolean; }; -export const DISCORD_INVITE_LINK_START = [ - "https://discord.com/invite/", - "https://ptb.discord.com/invite/", - "https://canary.discord.com/invite/", -]; +export const DISCORD_DOMAINS = ["discord.com", "ptb.discord.com", "canary.discord.com"]; + +export const INVITE_PROTOCOL = ["discord:", "http:", "https:"]; + +async function isDiscordInviteLink(url: string | URL) { + const parsedUrl = URL.parse(url); + + if (parsedUrl === null) { + return false; + } + + return ( + INVITE_PROTOCOL.includes(parsedUrl.protocol) && + DISCORD_DOMAINS.includes(parsedUrl.host) && + parsedUrl.pathname.startsWith("/invite/") + ); +} // this function is fucking shit. // I will fix it someday. @@ -39,21 +51,30 @@ export async function isUsedCf(res: Response) { export async function isInviteLink( url: string, ): Promise { - const response = await safeFetch(url as SafeUrl, { - method: "GET", - headers: { - "User-Agent": "Mozilla/5.0", + const response = await safeFetch( + url as SafeUrl, + { + method: "GET", + headers: { + "User-Agent": "Mozilla/5.0", + }, + }, + { + detectDiscordProtocol: true, }, - }); + ); if (response instanceof Error) { return response; } const resUrl = response.url; + const location = response.headers.get("location"); return { - content: DISCORD_INVITE_LINK_START.some((value) => resUrl.startsWith(value)), + content: + (await isDiscordInviteLink(resUrl)) || + (location !== null && (await isDiscordInviteLink(location))), isUsedCf: await isUsedCf(response), }; } diff --git a/src/infrastructure/http/src/safefetch.test.ts b/src/infrastructure/http/src/safefetch.test.ts index 4e8b1851..4ab9c349 100644 --- a/src/infrastructure/http/src/safefetch.test.ts +++ b/src/infrastructure/http/src/safefetch.test.ts @@ -1,8 +1,12 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; -vi.mock("./url", () => ({ - isLocalUrl: vi.fn(), -})); +vi.mock("./url", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + isLocalUrl: vi.fn(), + }; +}); vi.mock("./size", () => ({ isValidSize: vi.fn(), })); @@ -15,7 +19,7 @@ import { InvalidDomainError } from "./Error/InvalidDomainError"; import { LocalAddressError } from "./Error/LocalAddressError"; import { RedirectError } from "./Error/RedirectError"; import { DEFAULT_MAX_REDIRECT } from "./redirect"; -import { DISCORD_DOMAINS, safeFetch, safeFetchForDiscord } from "./safefetch"; +import { ALLOW_DISCORD_DOMAINS, safeFetch, safeFetchForDiscord } from "./safefetch"; import type { SafeUrl } from "./safeurl"; import { isValidSize } from "./size"; import { isLocalUrl } from "./url"; @@ -58,7 +62,7 @@ describe("safeFetchForDiscord", () => { expect(fetchMock).not.toHaveBeenCalled(); }); - it.each(DISCORD_DOMAINS)("allows the Discord domain '%s'", async (domain) => { + it.each(ALLOW_DISCORD_DOMAINS)("allows the Discord domain '%s'", async (domain) => { fetchMock.mockResolvedValueOnce(ok()); const result = await safeFetchForDiscord(url(`https://${domain}/api`)); expect(result).toBeInstanceOf(Response); diff --git a/src/infrastructure/http/src/safefetch.ts b/src/infrastructure/http/src/safefetch.ts index 6fe75e80..581d5ae0 100644 --- a/src/infrastructure/http/src/safefetch.ts +++ b/src/infrastructure/http/src/safefetch.ts @@ -7,9 +7,13 @@ import { DEFAULT_MAX_REDIRECT } from "./redirect"; import type { SafeUrl } from "./safeurl"; import { isValidSize } from "./size"; import { DEFAULT_TIMEOUT, DISCORD_TIMEOUT } from "./timeout"; -import { isLocalUrl } from "./url"; +import { isHttpProtocol, isLocalUrl } from "./url"; -export const DISCORD_DOMAINS = ["discord.com", "discordapp.com", "discord.gg"]; +export type SafeFetchOptions = { + detectDiscordProtocol?: boolean; +}; + +export const ALLOW_DISCORD_DOMAINS = ["discord.com", "discordapp.com", "discord.gg"]; export async function safeFetchForDiscord( input: SafeUrl, @@ -18,7 +22,7 @@ export async function safeFetchForDiscord( if (await isLocalUrl(input)) return new LocalAddressError(`${input} is local address.`); const hostname = new URL(input).hostname; - if (!DISCORD_DOMAINS.includes(hostname)) { + if (!ALLOW_DISCORD_DOMAINS.includes(hostname)) { return new InvalidDomainError(`${hostname} is not discord domain.`); } @@ -28,6 +32,7 @@ export async function safeFetchForDiscord( export async function safeFetch( input: SafeUrl, init?: RequestInit, + options?: SafeFetchOptions, ): Promise { let reqUrl: string = input; let currentInit = init; @@ -61,7 +66,11 @@ export async function safeFetch( return new HeaderError(`${location} is invalid.`); } - if (url.protocol !== "http:" && url.protocol !== "https:") { + if (options?.detectDiscordProtocol && url.protocol === "discord://") { + return response; + } + + if (!(await isHttpProtocol(url))) { return new HeaderError(`${url.protocol} is not allowed.`); } diff --git a/src/infrastructure/http/src/url.ts b/src/infrastructure/http/src/url.ts index 3cc318be..26742857 100644 --- a/src/infrastructure/http/src/url.ts +++ b/src/infrastructure/http/src/url.ts @@ -62,3 +62,9 @@ export async function isLocalUrl(url: string): Promise { return await isLocalHostname(hostname); } + +export async function isHttpProtocol(url: string | URL): Promise { + const protocol = URL.parse(url)?.protocol; + + return protocol === "http:" || protocol === "https:"; +}