Skip to content

Commit 68bd7bc

Browse files
committed
Fluffy: Simplify and optimize ContentDb pruning
Simplify by shrinking the data radius with 5% and just deleting all content that no longer falls within that range. This gives roughly a 150x speed improvement on a 4GB database. The downside is that we cannot be certain that a single 5% radius drop will result in a similar storage drop.
1 parent fec1f4a commit 68bd7bc

File tree

3 files changed

+61
-178
lines changed

3 files changed

+61
-178
lines changed

fluffy/database/content_db.nim

+36-112
Original file line numberDiff line numberDiff line change
@@ -37,17 +37,13 @@ declareCounter portal_pruning_counter,
3737
"Number of pruning events which occured during the node's uptime",
3838
labels = ["protocol_id"]
3939

40-
declareGauge portal_pruning_deleted_elements,
41-
"Number of elements deleted in the last pruning", labels = ["protocol_id"]
40+
declareGauge portal_pruning_used_size,
41+
"Total used size after the last pruning", labels = ["protocol_id"]
4242

43-
const
44-
contentDeletionFraction = 0.05 ## 5% of the content will be deleted when the
45-
## storage capacity is hit and radius gets adjusted.
43+
declareGauge portal_pruning_size,
44+
"Total size after the last pruning", labels = ["protocol_id"]
4645

4746
type
48-
RowInfo =
49-
tuple[contentId: array[32, byte], payloadLength: int64, distance: array[32, byte]]
50-
5147
ContentDB* = ref object
5248
backend: SqStoreRef
5349
kv: KvStoreRef
@@ -60,7 +56,6 @@ type
6056
vacuumStmt: SqliteStmt[NoParams, void]
6157
contentCountStmt: SqliteStmt[NoParams, int64]
6258
contentSizeStmt: SqliteStmt[NoParams, int64]
63-
getAllOrderedByDistanceStmt: SqliteStmt[array[32, byte], RowInfo]
6459
deleteOutOfRadiusStmt: SqliteStmt[(array[32, byte], array[32, byte]), void]
6560
largestDistanceStmt: SqliteStmt[array[32, byte], array[32, byte]]
6661

@@ -234,12 +229,6 @@ proc new*(
234229
let contentCountStmt =
235230
db.prepareStmt("SELECT COUNT(key) FROM kvstore;", NoParams, int64)[]
236231

237-
let getAllOrderedByDistanceStmt = db.prepareStmt(
238-
"SELECT key, length(value), xorDistance(?, key) as distance FROM kvstore ORDER BY distance DESC",
239-
array[32, byte],
240-
RowInfo,
241-
)[]
242-
243232
let deleteOutOfRadiusStmt = db.prepareStmt(
244233
"DELETE FROM kvstore WHERE isInRadius(?, key, ?) == 0",
245234
(array[32, byte], array[32, byte]),
@@ -261,7 +250,6 @@ proc new*(
261250
vacuumStmt: vacuumStmt,
262251
contentSizeStmt: contentSizeStmt,
263252
contentCountStmt: contentCountStmt,
264-
getAllOrderedByDistanceStmt: getAllOrderedByDistanceStmt,
265253
deleteOutOfRadiusStmt: deleteOutOfRadiusStmt,
266254
largestDistanceStmt: largestDistanceStmt,
267255
)
@@ -280,7 +268,6 @@ proc close*(db: ContentDB) =
280268
db.vacuumStmt.disposeSafe()
281269
db.contentCountStmt.disposeSafe()
282270
db.contentSizeStmt.disposeSafe()
283-
db.getAllOrderedByDistanceStmt.disposeSafe()
284271
db.deleteOutOfRadiusStmt.disposeSafe()
285272
db.largestDistanceStmt.disposeSafe()
286273
discard db.kv.close()
@@ -325,36 +312,6 @@ proc del*(db: ContentDB, key: ContentId) =
325312

326313
## Pruning related calls
327314

328-
proc deleteContentFraction*(
329-
db: ContentDB, target: UInt256, fraction: float64
330-
): (UInt256, int64, int64, int64) =
331-
## Deletes at most `fraction` percent of content from the database.
332-
## The content furthest from the provided `target` is deleted first.
333-
# TODO: The usage of `db.contentSize()` for the deletion calculation versus
334-
# `db.usedSize()` for the pruning threshold leads sometimes to some unexpected
335-
# results of how much content gets up deleted.
336-
doAssert(fraction > 0 and fraction < 1, "Deleted fraction should be > 0 and < 1")
337-
338-
let totalContentSize = db.contentSize()
339-
let bytesToDelete = int64(fraction * float64(totalContentSize))
340-
var deletedElements: int64 = 0
341-
342-
var ri: RowInfo
343-
var deletedBytes: int64 = 0
344-
let targetBytes = target.toBytesBE()
345-
for e in db.getAllOrderedByDistanceStmt.exec(targetBytes, ri):
346-
if deletedBytes + ri.payloadLength <= bytesToDelete:
347-
db.del(ri.contentId)
348-
deletedBytes = deletedBytes + ri.payloadLength
349-
inc deletedElements
350-
else:
351-
return (
352-
UInt256.fromBytesBE(ri.distance),
353-
deletedBytes,
354-
totalContentSize,
355-
deletedElements,
356-
)
357-
358315
proc reclaimSpace*(db: ContentDB): void =
359316
## Runs sqlite VACUUM commands which rebuilds the db, repacking it into a
360317
## minimal amount of disk space.
@@ -390,9 +347,33 @@ proc forcePrune*(db: ContentDB, localId: UInt256, radius: UInt256) =
390347
db.reclaimAndTruncate()
391348
notice "Finished database pruning"
392349

393-
proc putAndPrune*(db: ContentDB, key: ContentId, value: openArray[byte]): PutResult =
394-
db.put(key, value)
350+
proc prune*(db: ContentDB) =
351+
## Decrease the radius with `radiusDecreasePercentage` and prune the content
352+
## outside of the new radius.
353+
const radiusDecreasePercentage = 5
354+
# The amount here is somewhat arbitrary but should be big enough to not
355+
# constantly require pruning. If it is too small, it would adjust the radius
356+
# so often that the network might not be able to keep up with the current
357+
# radius of the node. At the same time, it would iterate over the content also
358+
# way to often. If the amount is too big it could render the node unresponsive
359+
# for too long.
360+
361+
let newRadius = db.dataRadius div 100 * (100 - radiusDecreasePercentage)
362+
363+
info "Pruning content outside of radius",
364+
oldRadius = db.dataRadius, newRadius = newRadius
365+
db.deleteContentOutOfRadius(db.localId, newRadius)
366+
db.dataRadius = newRadius
395367

368+
let usedSize = db.usedSize()
369+
let size = db.size()
370+
portal_pruning_counter.inc()
371+
portal_pruning_used_size.set(usedSize)
372+
portal_pruning_size.set(size)
373+
374+
info "Finished pruning content", usedSize, size, storageCapacity = db.storageCapacity
375+
376+
proc putAndPrune*(db: ContentDB, key: ContentId, value: openArray[byte]) =
396377
# The used size is used as pruning threshold. This means that the database
397378
# size will reach the size specified in db.storageCapacity and will stay
398379
# around that size throughout the node's lifetime, as after content deletion
@@ -404,55 +385,12 @@ proc putAndPrune*(db: ContentDB, key: ContentId, value: openArray[byte]): PutRes
404385
# static radius.
405386
# When not using the `forcePrune` functionality, pruning to the required
406387
# capacity will not be very effictive and free pages will not be returned.
407-
let dbSize = db.usedSize()
408-
409-
if dbSize < int64(db.storageCapacity):
410-
return PutResult(kind: ContentStored)
411-
else:
412-
# Note:
413-
# An approach of a deleting a full fraction is chosen here, in an attempt
414-
# to not continuously require radius updates, which could have a negative
415-
# impact on the network. However this should be further investigated, as
416-
# doing a large fraction deletion could cause a temporary node performance
417-
# degradation. The `contentDeletionFraction` might need further tuning or
418-
# one could opt for a much more granular approach using sql statement
419-
# in the trend of:
420-
# "SELECT key FROM kvstore ORDER BY xorDistance(?, key) DESC LIMIT 1"
421-
# Potential adjusting the LIMIT for how many items require deletion.
422-
let (distanceOfFurthestElement, deletedBytes, totalContentSize, deletedElements) =
423-
db.deleteContentFraction(db.localId, contentDeletionFraction)
424-
425-
let deletedFraction = float64(deletedBytes) / float64(totalContentSize)
426-
info "Deleted content fraction", deletedBytes, deletedElements, deletedFraction
427-
428-
return PutResult(
429-
kind: DbPruned,
430-
distanceOfFurthestElement: distanceOfFurthestElement,
431-
deletedFraction: deletedFraction,
432-
deletedElements: deletedElements,
433-
)
388+
db.put(key, value)
434389

435-
proc adjustRadius(
436-
db: ContentDB, deletedFraction: float64, distanceOfFurthestElement: UInt256
437-
) =
438-
# Invert fraction as the UInt256 implementation does not support
439-
# multiplication by float
440-
let invertedFractionAsInt = int64(1.0 / deletedFraction)
441-
let scaledRadius = db.dataRadius div u256(invertedFractionAsInt)
442-
443-
# Choose a larger value to avoid the situation where the
444-
# `distanceOfFurthestElement is very close to the local id so that the local
445-
# radius would end up too small to accept any more data to the database.
446-
# If scaledRadius radius will be larger it will still contain all elements.
447-
let newRadius = max(scaledRadius, distanceOfFurthestElement)
448-
449-
info "Database radius adjusted",
450-
oldRadius = db.dataRadius, newRadius = newRadius, distanceOfFurthestElement
451-
452-
# Both scaledRadius and distanceOfFurthestElement are smaller than current
453-
# dataRadius, so the radius will constantly decrease through the node its
454-
# lifetime.
455-
db.dataRadius = newRadius
390+
while db.usedSize() >= int64(db.storageCapacity):
391+
# Note: This should typically only happen once, but if the content is not
392+
# distributed uniformly over the id range, it could happen multiple times.
393+
db.prune()
456394

457395
proc createGetHandler*(db: ContentDB): DbGetHandler =
458396
return (
@@ -477,21 +415,7 @@ proc createStoreHandler*(db: ContentDB, cfg: RadiusConfig): DbStoreHandler =
477415
of Dynamic:
478416
# In case of dynamic radius, the radius gets adjusted based on the
479417
# to storage capacity and content gets pruned accordingly.
480-
let res = db.putAndPrune(contentId, content)
481-
if res.kind == DbPruned:
482-
portal_pruning_counter.inc()
483-
portal_pruning_deleted_elements.set(res.deletedElements.int64)
484-
485-
if res.deletedFraction > 0.0:
486-
db.adjustRadius(res.deletedFraction, res.distanceOfFurthestElement)
487-
else:
488-
# Note:
489-
# This can occur when the furthest content is bigger than the fraction
490-
# size. This is unlikely to happen as it would require either very
491-
# small storage capacity or a very small `contentDeletionFraction`
492-
# combined with some big content.
493-
info "Database pruning attempt resulted in no content deleted"
494-
return
418+
db.putAndPrune(contentId, content)
495419
of Static:
496420
# If the radius is static, it may never be adjusted, database capacity
497421
# is disabled and no pruning is ever done.

fluffy/tests/test_content_db.nim

+11-44
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Fluffy
2-
# Copyright (c) 2021-2024 Status Research & Development GmbH
2+
# Copyright (c) 2021-2025 Status Research & Development GmbH
33
# Licensed and distributed under either of
44
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
55
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
@@ -36,7 +36,7 @@ suite "Content Database":
3636
db.contains(key) == false
3737

3838
block:
39-
discard db.putAndPrune(key, [byte 0, 1, 2, 3])
39+
db.putAndPrune(key, [byte 0, 1, 2, 3])
4040

4141
var val = Opt.none(seq[byte])
4242
proc onData(data: openArray[byte]) =
@@ -67,11 +67,11 @@ suite "Content Database":
6767

6868
let numBytes = 10000
6969
let size1 = db.size()
70-
discard db.putAndPrune(u256(1), genByteSeq(numBytes))
70+
db.putAndPrune(u256(1), genByteSeq(numBytes))
7171
let size2 = db.size()
72-
discard db.putAndPrune(u256(2), genByteSeq(numBytes))
72+
db.putAndPrune(u256(2), genByteSeq(numBytes))
7373
let size3 = db.size()
74-
discard db.putAndPrune(u256(2), genByteSeq(numBytes))
74+
db.putAndPrune(u256(2), genByteSeq(numBytes))
7575
let size4 = db.size()
7676
let usedSize = db.usedSize()
7777

@@ -104,53 +104,20 @@ suite "Content Database":
104104
usedSize2 == size6
105105

106106
test "ContentDB pruning":
107-
# TODO: This test is extremely breakable when changing
108-
# `contentDeletionFraction` and/or the used test values.
109-
# Need to rework either this test, or the pruning mechanism, or probably
110-
# both.
111107
let
112-
storageCapacity = 100_000'u64
108+
storageCapacity = 1_000_000'u64 # 1MB
113109
db = ContentDB.new(
114110
"", storageCapacity, RadiusConfig(kind: Dynamic), testId, inMemory = true
115111
)
112+
numBytes = 1_000
113+
bytes = genByteSeq(numBytes)
116114

117-
furthestElement = u256(40)
118-
secondFurthest = u256(30)
119-
thirdFurthest = u256(20)
120-
121-
numBytes = 10_000
122-
pr1 = db.putAndPrune(u256(1), genByteSeq(numBytes))
123-
pr2 = db.putAndPrune(thirdFurthest, genByteSeq(numBytes))
124-
pr3 = db.putAndPrune(u256(3), genByteSeq(numBytes))
125-
pr4 = db.putAndPrune(u256(10), genByteSeq(numBytes))
126-
pr5 = db.putAndPrune(u256(5), genByteSeq(numBytes))
127-
pr6 = db.putAndPrune(u256(11), genByteSeq(numBytes))
128-
pr7 = db.putAndPrune(furthestElement, genByteSeq(2000))
129-
pr8 = db.putAndPrune(secondFurthest, genByteSeq(2000))
130-
pr9 = db.putAndPrune(u256(2), genByteSeq(numBytes))
131-
pr10 = db.putAndPrune(u256(4), genByteSeq(12000))
132-
133-
check:
134-
pr1.kind == ContentStored
135-
pr2.kind == ContentStored
136-
pr3.kind == ContentStored
137-
pr4.kind == ContentStored
138-
pr5.kind == ContentStored
139-
pr6.kind == ContentStored
140-
pr7.kind == ContentStored
141-
pr8.kind == ContentStored
142-
pr9.kind == ContentStored
143-
pr10.kind == DbPruned
115+
for i in 0 .. 800:
116+
let contentId = UInt256.high div 800 * i.u256
117+
db.putAndPrune(contentId, bytes)
144118

145119
check:
146-
pr10.deletedElements == 2
147120
uint64(db.usedSize()) < storageCapacity
148-
# With the current settings the 2 furthest elements will be deleted,
149-
# i.e key 30 and 40. The furthest non deleted one will have key 20.
150-
pr10.distanceOfFurthestElement == thirdFurthest
151-
not db.contains(furthestElement)
152-
not db.contains(secondFurthest)
153-
db.contains(thirdFurthest)
154121

155122
test "ContentDB force pruning":
156123
const

fluffy/tests/wire_protocol_tests/test_portal_wire_protocol.nim

+14-22
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
{.used.}
99

1010
import
11-
std/[algorithm, sequtils],
11+
std/sequtils,
1212
chronos,
1313
testutils/unittests,
1414
results,
@@ -386,10 +386,6 @@ procSuite "Portal Wire Protocol Tests":
386386
await node2.stopPortalProtocol()
387387

388388
asyncTest "Adjusting radius after hitting full database":
389-
# TODO: This test is extremely breakable when changing
390-
# `contentDeletionFraction` and/or the used test values.
391-
# Need to rework either this test, or the pruning mechanism, or probably
392-
# both.
393389
let
394390
node1 = initDiscoveryNode(rng, PrivateKey.random(rng[]), localAddress(20303))
395391

@@ -413,27 +409,23 @@ procSuite "Portal Wire Protocol Tests":
413409
)
414410

415411
let item = genByteSeq(10_000)
416-
var distances: seq[UInt256] = @[]
412+
var contentIds: seq[UInt256] = @[]
413+
let startRadius = db.dataRadius
417414

418415
for i in 0 ..< 40:
419-
proto1.storeContent(ByteList[2048].init(@[uint8(i)]), u256(i), item)
420-
distances.add(u256(i) xor proto1.localNode.id)
416+
let contentId = UInt256.high div 39 * i.u256
417+
proto1.storeContent(ByteList[2048].init(@[uint8(i)]), contentId, item)
418+
contentIds.add(contentId)
421419

422-
distances.sort(order = SortOrder.Descending)
420+
check db.dataRadius < startRadius
423421

424-
# With the selected db limit of 100_000 bytes and added elements of 10_000
425-
# bytes each, the two furthest elements should be prined, i.e index 0 and 1.
426-
# Index 2 should be still be in database and its distance should be <=
427-
# updated radius
428-
check:
429-
not db.contains((distances[0] xor proto1.localNode.id))
430-
not db.contains((distances[1] xor proto1.localNode.id))
431-
not db.contains((distances[2] xor proto1.localNode.id))
432-
db.contains((distances[3] xor proto1.localNode.id))
433-
# The radius has been updated and is lower than the maximum start value.
434-
proto1.dataRadius() < UInt256.high
435-
# Yet higher than or equal to the furthest non deleted element.
436-
proto1.dataRadius() >= distances[3]
422+
for contentId in contentIds:
423+
if db.dataRadius >= (contentId xor proto1.localNode.id):
424+
check db.contains(contentId)
425+
else:
426+
check not db.contains(contentId)
427+
428+
check db.usedSize() < int64(dbLimit)
437429

438430
await proto1.stop()
439431
await node1.closeWait()

0 commit comments

Comments
 (0)