Skip to content

Commit 006c833

Browse files
authored
Ensure no two advisories represent the same issue (#182)
It dawns on me that the easiest way we might screw up the publication of an advisory is if we accidentally create a new JLSEC id that is representing the same alias or upstream advisory. This would be quite easy to do with multiple concurrent pull requests. We should probably tweak some toggles to make sure that CI is always run against a fresh merge with a queue or somesuch.
1 parent 07d7603 commit 006c833

File tree

1 file changed

+19
-2
lines changed

1 file changed

+19
-2
lines changed

scripts/assign_ids_and_dates.jl

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@ using SecurityAdvisories
22
using Dates: Dates, DateTime
33
using TimeZones: TimeZones, ZonedDateTime
44

5+
function union_unique!(existing, additions)
6+
for addition in additions
7+
addition in existing && throw(ArgumentError("value $addition already exists in the $(typeof(existing))"))
8+
push!(existing, addition)
9+
end
10+
return existing
11+
end
12+
513
function main()
614
all_advisories_path = joinpath(@__DIR__, "..", "advisories")
715
published_advisories_path = joinpath(@__DIR__, "..", "advisories", "published")
@@ -16,15 +24,19 @@ function main()
1624
last_id = max(last_id, something(tryparse(Int, chopprefix(chopsuffix(file, ".md"), prefix)), 0))
1725
end
1826

19-
# Then go through the published advisories and ensure all IDs are assigned
20-
# and that the dates accurately match the commit dates
27+
# Then go through the published advisories and ensure all IDs are assigned,
28+
# that the dates accurately match the commit dates, and there are no
29+
# duplicate aliases/upstreams in the database
30+
ids = Set{String}()
2131
n_updated = 0
2232
now = Dates.now(Dates.UTC)
2333
for (root, _, files) in walkdir(published_advisories_path), file in files
2434
path = joinpath(root, file)
2535
SecurityAdvisories.is_jlsec_advisory_path(path) || continue
2636
advisory = SecurityAdvisories.parsefile(path)
2737
updated = false
38+
39+
# First assign ids and determine the modified and published timestamps
2840
if startswith(advisory.id, string(SecurityAdvisories.PREFIX, "-0000-"))
2941
last_id += 1
3042
advisory.id = string(SecurityAdvisories.PREFIX, "-", year, "-", last_id)
@@ -40,6 +52,8 @@ function main()
4052
git_published = readchomp(`git log -1 --format="%cd" --date=iso-strict --diff-filter=A -- $path`)
4153
published = isempty(git_published) ? modified : DateTime(ZonedDateTime(git_published), Dates.UTC)
4254
end
55+
56+
# Now update the timestamps (if we need to)
4357
if something(advisory.withdrawn, typemin(DateTime)) > advisory.modified
4458
# If the withdrawn date is _after_ the previously stored modified time, then it's a new modification
4559
# The effective time of the widthdraw will be upon publication to this repo — the new modified time
@@ -59,6 +73,9 @@ function main()
5973
updated = true
6074
end
6175

76+
# Ensure this advisory isn't already represented in the database in any manner
77+
union_unique!(ids, [advisory.id; advisory.aliases; advisory.upstream])
78+
6279
if updated
6380
# TODO: we could do better by applying a git diff that only includes the semantically meaningful parts
6481
open(path, "w") do io

0 commit comments

Comments
 (0)