Skip to content

Commit 65854f8

Browse files
committed
Attempt to replace GraphMap’s Map with a CLruCache
Fixes issue trustification#1218
1 parent c10d89a commit 65854f8

File tree

6 files changed

+78
-21
lines changed

6 files changed

+78
-21
lines changed

Cargo.lock

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ bytes = "1.5"
5454
bytesize = "1.3"
5555
chrono = { version = "0.4.35", default-features = false }
5656
clap = "4"
57+
clru = "0.6.2"
5758
concat-idents = "1"
5859
cpe = "0.1.5"
5960
criterion = "0.5.1"

modules/analysis/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ actix-web = { workspace = true }
1515
anyhow = { workspace = true }
1616
cpe = { workspace = true }
1717
log = { workspace = true }
18+
clru = { workspace = true }
1819
parking_lot = { workspace = true }
1920
petgraph = { workspace = true }
2021
sea-orm = { workspace = true }

modules/analysis/src/model.rs

Lines changed: 64 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
use petgraph::Graph;
22
use serde::Serialize;
33
use std::{
4-
collections::HashMap,
54
fmt,
65
ops::{Deref, DerefMut},
76
};
7+
use std::hash::RandomState;
8+
use std::num::NonZeroUsize;
9+
use clru::{CLruCache, CLruCacheConfig, WeightScale};
10+
use petgraph::graph::Edge;
811
use trustify_common::{cpe::Cpe, purl::Purl};
912
use trustify_entity::relationship::Relationship;
1013
use utoipa::ToSchema;
@@ -35,7 +38,39 @@ pub struct PackageNode {
3538
pub document_id: String,
3639
pub product_name: String,
3740
pub product_version: String,
41+
pub approximate_memory_size: usize,
3842
}
43+
44+
impl PackageNode {
45+
pub(crate) fn set_approximate_memory_size(&self) -> PackageNode {
46+
47+
// Is there a better way to do this? I think this will under-estimate the memory size
48+
let approximate_memory_size =
49+
size_of::<PackageNode>() +
50+
self.sbom_id.len() +
51+
self.node_id.len() +
52+
self.purl.iter().fold(0, |acc, purl|
53+
// use the json string length as an approximation of the memory size
54+
acc + serde_json::to_string(purl).unwrap_or_else(|_| "".to_string()).len()
55+
) +
56+
self.cpe.iter().fold(0, |acc, cpe|
57+
// use the json string length as an approximation of the memory size
58+
acc + serde_json::to_string(cpe).unwrap_or_else(|_| "".to_string()).len()
59+
) +
60+
self.name.len() +
61+
self.version.len() +
62+
self.published.len() +
63+
self.document_id.len() +
64+
self.product_name.len() +
65+
self.product_version.len();
66+
67+
PackageNode {
68+
approximate_memory_size,
69+
..self.clone()
70+
}
71+
}
72+
}
73+
3974
impl fmt::Display for PackageNode {
4075
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4176
write!(f, "{:?}", self.purl)
@@ -151,22 +186,38 @@ impl DerefMut for DepSummary {
151186
}
152187
}
153188

154-
#[derive(Debug)]
155189
pub struct GraphMap {
156-
map: HashMap<String, Graph<PackageNode, Relationship, petgraph::Directed>>,
190+
map: CLruCache<String, Graph<PackageNode, Relationship, petgraph::Directed>, RandomState, GraphMapWeightScale>,
157191
}
158192

193+
struct GraphMapWeightScale;
194+
195+
impl WeightScale<String, Graph<PackageNode, Relationship, petgraph::Directed>> for GraphMapWeightScale {
196+
fn weight(&self, key: &String, value: &Graph<PackageNode, Relationship, petgraph::Directed>) -> usize {
197+
let mut result = key.len();
198+
for n in value.raw_nodes() {
199+
result + n.weight.approximate_memory_size;
200+
}
201+
result = value.raw_edges().len() * size_of::<Edge<Relationship>>();
202+
result
203+
}
204+
}
205+
206+
159207
impl GraphMap {
160208
// Create a new instance of GraphMap
161-
pub fn new() -> Self {
209+
pub fn new(cap: NonZeroUsize) -> Self {
210+
let x = CLruCache::with_config(
211+
CLruCacheConfig::new(cap).with_scale(GraphMapWeightScale {}),
212+
);
162213
GraphMap {
163-
map: HashMap::new(),
214+
map: x,
164215
}
165216
}
166217

167218
// Check if the map contains a key
168219
pub fn contains_key(&self, key: &str) -> bool {
169-
self.map.contains_key(key)
220+
self.map.contains(key)
170221
}
171222

172223
// Get the number of graphs in the map
@@ -185,27 +236,22 @@ impl GraphMap {
185236
key: String,
186237
graph: Graph<PackageNode, Relationship, petgraph::Directed>,
187238
) {
188-
self.map.insert(key, graph);
239+
self.map.put_with_weight(key, graph);
189240
}
190241

191242
// Retrieve a reference to a graph by its key (read access)
192-
pub fn get(&self, key: &str) -> Option<&Graph<PackageNode, Relationship, petgraph::Directed>> {
243+
pub fn get(&mut self, key: &str) -> Option<&Graph<PackageNode, Relationship, petgraph::Directed>> {
193244
self.map.get(key)
194245
}
195246

196-
// Retrieve all sbom ids(read access)
197-
pub fn sbom_ids(&self) -> Vec<String> {
198-
self.map.keys().cloned().collect()
199-
}
200-
201247
// Clear all graphs from the map
202248
pub fn clear(&mut self) {
203249
self.map.clear();
204250
}
205251
}
206252

207-
impl Default for GraphMap {
208-
fn default() -> Self {
209-
Self::new()
210-
}
211-
}
253+
// impl Default for GraphMap {
254+
// fn default() -> Self {
255+
// Self::new()
256+
// }
257+
// }

modules/analysis/src/service/load.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,8 @@ impl AnalysisService {
261261
document_id: package.document_id.clone().unwrap_or_default(),
262262
product_name: package.product_name.clone().unwrap_or_default(),
263263
product_version: package.product_version.clone().unwrap_or_default(),
264-
});
264+
approximate_memory_size: 0,
265+
}.set_approximate_memory_size());
265266

266267
log::debug!("Inserting - id: {}, index: {index:?}", entry.key());
267268

modules/analysis/src/service/mod.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ use std::{
2727
fmt::Debug,
2828
sync::Arc,
2929
};
30+
use std::num::NonZeroUsize;
3031
use tracing::instrument;
3132
use trustify_common::{
3233
db::query::Value,
@@ -154,9 +155,9 @@ impl AnalysisService {
154155
/// Also, we do not implement default because of this. As a new instance has the implication
155156
/// of having its own cache. So creating a new instance should be a deliberate choice.
156157
#[allow(clippy::new_without_default)]
157-
pub fn new() -> Self {
158+
pub fn new(cap: NonZeroUsize) -> Self {
158159
Self {
159-
graph: Default::default(),
160+
graph: Arc::new(RwLock::new(GraphMap::new(cap))),
160161
}
161162
}
162163

0 commit comments

Comments
 (0)