Skip to content

Commit

Permalink
Use the DeepSizeOf trait to get an accurate account Graph memory usage
Browse files Browse the repository at this point in the history
Signed-off-by: Hiram Chirino <[email protected]>
  • Loading branch information
chirino committed Feb 2, 2025
1 parent 5319ce7 commit a2b8e8e
Show file tree
Hide file tree
Showing 11 changed files with 69 additions and 58 deletions.
23 changes: 23 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ criterion = "0.5.1"
csaf = { version = "0.5.0", default-features = false }
csaf-walker = { version = "0.10.0", default-features = false }
cve = "0.3.1"
deepsize = "0.2.0"
env_logger = "0.11.0"
futures = "0.3.30"
futures-util = "0.3"
Expand Down Expand Up @@ -204,3 +205,6 @@ osv = { git = "https://github.com/ctron/osv", rev = "b53f1590bbbdc663e3efe405f1f
# to pickup fix: https://github.com/Abraxas-365/langchain-rust/pull/246
# and fix: https://github.com/Abraxas-365/langchain-rust/pull/250
langchain-rust = { git = "https://github.com/chirino/langchain-rust", branch = "main" }

# to pickup feat: https://github.com/Aeledfyr/deepsize/pull/41
deepsize = { git = "https://github.com/chirino/deepsize", branch = "main" }
1 change: 1 addition & 0 deletions common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ bytesize = { workspace = true, features = ["serde"] }
chrono = { workspace = true }
clap = { workspace = true, features = ["derive", "env"] }
cpe = { workspace = true }
deepsize = { workspace = true }
hex = { workspace = true }
human-date-parser = { workspace = true }
itertools = { workspace = true }
Expand Down
3 changes: 2 additions & 1 deletion common/src/cpe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use cpe::{
cpe::Cpe as _,
uri::{OwnedUri, Uri},
};
use deepsize::DeepSizeOf;
use serde::{
de::{Error, Visitor},
Deserialize, Deserializer, Serialize, Serializer,
Expand All @@ -17,7 +18,7 @@ use utoipa::{
};
use uuid::Uuid;

#[derive(Clone, Hash, Eq, PartialEq)]
#[derive(Clone, Hash, Eq, PartialEq, DeepSizeOf)]
pub struct Cpe {
uri: OwnedUri,
}
Expand Down
3 changes: 2 additions & 1 deletion common/src/purl.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use deepsize::DeepSizeOf;
use packageurl::PackageUrl;
use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS};
use serde::{
Expand Down Expand Up @@ -25,7 +26,7 @@ pub enum PurlErr {
Package(#[from] packageurl::Error),
}

#[derive(Clone, PartialEq, Eq, Hash)]
#[derive(Clone, PartialEq, Eq, Hash, DeepSizeOf)]
pub struct Purl {
pub ty: String,
pub namespace: Option<String>,
Expand Down
1 change: 1 addition & 0 deletions entity/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ trustify-cvss = { workspace = true }

async-graphql = { workspace = true, features = ["uuid", "time"] }
cpe = { workspace = true }
deepsize = { workspace = true }
schemars = { workspace = true }
sea-orm = { workspace = true, features = [
"sqlx-postgres",
Expand Down
2 changes: 2 additions & 0 deletions entity/src/relationship.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use deepsize::DeepSizeOf;
use sea_orm::{DeriveActiveEnum, EnumIter};
use std::fmt;

Expand All @@ -17,6 +18,7 @@ use std::fmt;
)]
#[sea_orm(rs_type = "i32", db_type = "Integer")]
#[serde(rename_all = "snake_case")]
#[derive(DeepSizeOf)]
// When adding a new variant, also add this to the "relationship" table.
pub enum Relationship {
#[sea_orm(num_value = 0)]
Expand Down
1 change: 1 addition & 0 deletions modules/analysis/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ actix-http = { workspace = true }
actix-web = { workspace = true }
anyhow = { workspace = true }
cpe = { workspace = true }
deepsize = { workspace = true, features = ["cpe", "petgraph"] }
log = { workspace = true }
moka = { workspace = true, features = ["sync"] }
parking_lot = { workspace = true }
Expand Down
52 changes: 16 additions & 36 deletions modules/analysis/src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use std::{
ops::{Deref, DerefMut},
};

use deepsize::DeepSizeOf;
use moka::sync::Cache;
use std::sync::Arc;
use trustify_common::{cpe::Cpe, purl::Purl};
Expand All @@ -25,7 +26,7 @@ impl fmt::Display for AnalysisStatus {
}
}

#[derive(Debug, Clone, PartialEq, Eq, ToSchema, serde::Serialize)]
#[derive(Debug, Clone, PartialEq, Eq, ToSchema, serde::Serialize, DeepSizeOf)]
pub struct PackageNode {
pub sbom_id: String,
pub node_id: String,
Expand All @@ -37,33 +38,6 @@ pub struct PackageNode {
pub document_id: String,
pub product_name: String,
pub product_version: String,
pub approximate_memory_size: u32,
}

impl PackageNode {
pub(crate) fn set_approximate_memory_size(&self) -> PackageNode {
// Is there a better way to do this?
let size = size_of::<PackageNode>()
+ self.sbom_id.len()
+ self.node_id.len()
+ self.purl.iter().fold(0, |acc, purl|
// use the json string length as an approximation of the memory size
acc + serde_json::to_string(purl).unwrap_or_else(|_| "".to_string()).len())
+ self.cpe.iter().fold(0, |acc, cpe|
// use the json string length as an approximation of the memory size
acc + serde_json::to_string(cpe).unwrap_or_else(|_| "".to_string()).len())
+ self.name.len()
+ self.version.len()
+ self.published.len()
+ self.document_id.len()
+ self.product_name.len()
+ self.product_version.len();

PackageNode {
approximate_memory_size: size.try_into().unwrap_or(u32::MAX),
..self.clone()
}
}
}

impl fmt::Display for PackageNode {
Expand Down Expand Up @@ -188,20 +162,26 @@ pub struct GraphMap {
}

#[allow(clippy::ptr_arg)] // &String is required by Cache::builder().weigher() method
fn weigher(key: &String, value: &Arc<PackageGraph>) -> u32 {
let mut result = key.len();
for n in value.raw_nodes() {
result += n.weight.approximate_memory_size as usize;
}
result += size_of_val(value.raw_edges());
result.try_into().unwrap_or(u32::MAX)
fn size_of_graph_entry(key: &String, value: &Arc<PackageGraph>) -> u32 {
(
key.deep_size_of()
+ value.as_ref().deep_size_of()
// Also add in some entry overhead of the cache entry
+ 20
// todo: find a better estimate for the the moka ValueEntry
)
.try_into()
.unwrap_or(u32::MAX)
}

impl GraphMap {
// Create a new instance of GraphMap
pub fn new(cap: u64) -> Self {
GraphMap {
map: Cache::builder().weigher(weigher).max_capacity(cap).build(),
map: Cache::builder()
.weigher(size_of_graph_entry)
.max_capacity(cap)
.build(),
}
}

Expand Down
28 changes: 12 additions & 16 deletions modules/analysis/src/service/load.rs
Original file line number Diff line number Diff line change
Expand Up @@ -254,22 +254,18 @@ impl AnalysisService {

match nodes.entry(package.node_id.clone()) {
Entry::Vacant(entry) => {
let index = g.add_node(
PackageNode {
sbom_id: distinct_sbom_id.to_string(),
node_id: package.node_id,
purl: to_purls(package.purls),
cpe: to_cpes(package.cpes),
name: package.node_name,
version: package.node_version.clone().unwrap_or_default(),
published: package.published.clone(),
document_id: package.document_id.clone().unwrap_or_default(),
product_name: package.product_name.clone().unwrap_or_default(),
product_version: package.product_version.clone().unwrap_or_default(),
approximate_memory_size: 0,
}
.set_approximate_memory_size(),
);
let index = g.add_node(PackageNode {
sbom_id: distinct_sbom_id.to_string(),
node_id: package.node_id,
purl: to_purls(package.purls),
cpe: to_cpes(package.cpes),
name: package.node_name,
version: package.node_version.clone().unwrap_or_default(),
published: package.published.clone(),
document_id: package.document_id.clone().unwrap_or_default(),
product_name: package.product_name.clone().unwrap_or_default(),
product_version: package.product_version.clone().unwrap_or_default(),
});

log::debug!("Inserting - id: {}, index: {index:?}", entry.key());

Expand Down
9 changes: 5 additions & 4 deletions modules/analysis/src/service/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,18 +261,19 @@ async fn test_cache_size_used(ctx: &TrustifyContext) -> Result<(), anyhow::Error
let all_graphs = service.load_all_graphs(&ctx.db).await?;
assert_eq!(all_graphs.len(), 1);

// Does 3.4 KB sound right?
let kb = 1024;
let small_sbom_size = service.cache_size_used();
assert_eq!(small_sbom_size, 3505u64);
assert!(small_sbom_size > 6 * kb);
assert!(small_sbom_size < 7 * kb);

ctx.ingest_documents(["spdx/quarkus-bom-3.2.11.Final-redhat-00001.json"])
.await?;
let all_graphs = service.load_all_graphs(&ctx.db).await?;
assert_eq!(all_graphs.len(), 2);

// Does 676.7 KB sound right?
let big_sbom_size = service.cache_size_used() - small_sbom_size;
assert_eq!(big_sbom_size, 693006u64);
assert!(big_sbom_size > 950 * kb);
assert!(big_sbom_size < 960 * kb);

// Now lets try it with small cache that can at least fit the small bom
let service = AnalysisService::new_sized(small_sbom_size * 2);
Expand Down

0 comments on commit a2b8e8e

Please sign in to comment.