From 0090c83604d0513138a5f1ef8a68a691ee291e36 Mon Sep 17 00:00:00 2001 From: Thibault Normand Date: Wed, 11 Dec 2024 11:26:27 +0100 Subject: [PATCH 1/9] feat(graph): register composite indices. --- .../kubehound/graph/kubehound-db-init.groovy | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/deployments/kubehound/graph/kubehound-db-init.groovy b/deployments/kubehound/graph/kubehound-db-init.groovy index a4753ed15..7147f456d 100644 --- a/deployments/kubehound/graph/kubehound-db-init.groovy +++ b/deployments/kubehound/graph/kubehound-db-init.groovy @@ -179,6 +179,16 @@ mgmt.buildIndex('byServiceEndpoint', Vertex.class).addKey(serviceEndpoint).build mgmt.buildIndex('byServiceDns', Vertex.class).addKey(serviceDns).buildCompositeIndex(); mgmt.buildIndex('byExposure', Vertex.class).addKey(exposure).buildCompositeIndex(); +// Create composite indices for the properties we want to search on +mgmt.buildIndex('byClusterAndRunIDComposite', Vertex.class).addKey(cluster).addKey(runID).buildCompositeIndex(); +mgmt.buildIndex('byClassAndRunIDComposite', Vertex.class).addKey(cls).addKey(runID).buildCompositeIndex(); +mgmt.buildIndex('byClassAndClusterComposite', Vertex.class).addKey(cls).addKey(cluster).buildCompositeIndex(); +mgmt.buildIndex('byClassAndTypeComposite', Vertex.class).addKey(cls).addKey(type).buildCompositeIndex(); +mgmt.buildIndex('byClassAndExposureComposite', Vertex.class).addKey(cls).addKey(exposure).buildCompositeIndex(); +mgmt.buildIndex('byTypeAndNameComposite', Vertex.class).addKey(type).addKey(name).buildCompositeIndex(); +mgmt.buildIndex('byImageAndRunIDComposite', Vertex.class).addKey(image).addKey(runID).buildCompositeIndex(); +mgmt.buildIndex('byAppAndRunIDComposite', Vertex.class).addKey(app).addKey(runID).buildCompositeIndex(); +mgmt.buildIndex('byNamespaceAndRunIDComposite', Vertex.class).addKey(namespace).addKey(runID).buildCompositeIndex(); mgmt.commit(); @@ -194,9 +204,24 @@ ManagementSystem.awaitGraphIndexStatus(graph, 'byName').status(SchemaStatus.ENAB ManagementSystem.awaitGraphIndexStatus(graph, 'byNamespace').status(SchemaStatus.ENABLED).call(); ManagementSystem.awaitGraphIndexStatus(graph, 'byType').status(SchemaStatus.ENABLED).call(); ManagementSystem.awaitGraphIndexStatus(graph, 'byCritical').status(SchemaStatus.ENABLED).call(); +ManagementSystem.awaitGraphIndexStatus(graph, 'byPort').status(SchemaStatus.ENABLED).call(); +ManagementSystem.awaitGraphIndexStatus(graph, 'byPortName').status(SchemaStatus.ENABLED).call(); +ManagementSystem.awaitGraphIndexStatus(graph, 'byServiceEndpoint').status(SchemaStatus.ENABLED).call(); +ManagementSystem.awaitGraphIndexStatus(graph, 'byServiceDns').status(SchemaStatus.ENABLED).call(); +ManagementSystem.awaitGraphIndexStatus(graph, 'byExposure').status(SchemaStatus.ENABLED).call(); + +ManagementSystem.awaitGraphIndexStatus(graph, 'byClusterAndRunIDComposite').status(SchemaStatus.ENABLED).call(); +ManagementSystem.awaitGraphIndexStatus(graph, 'byClassAndRunIDComposite').status(SchemaStatus.ENABLED).call(); +ManagementSystem.awaitGraphIndexStatus(graph, 'byClassAndClusterComposite').status(SchemaStatus.ENABLED).call(); +ManagementSystem.awaitGraphIndexStatus(graph, 'byClassAndTypeComposite').status(SchemaStatus.ENABLED).call(); +ManagementSystem.awaitGraphIndexStatus(graph, 'byClassAndExposureComposite').status(SchemaStatus.ENABLED).call(); +ManagementSystem.awaitGraphIndexStatus(graph, 'byTypeAndNameComposite').status(SchemaStatus.ENABLED).call(); +ManagementSystem.awaitGraphIndexStatus(graph, 'byImageAndRunIDComposite').status(SchemaStatus.ENABLED).call(); +ManagementSystem.awaitGraphIndexStatus(graph, 'byAppAndRunIDComposite').status(SchemaStatus.ENABLED).call(); +ManagementSystem.awaitGraphIndexStatus(graph, 'byNamespaceAndRunIDComposite').status(SchemaStatus.ENABLED).call(); System.out.println("[KUBEHOUND] graph schema and indexes ready"); mgmt.close(); // Close the open connection -:remote close \ No newline at end of file +:remote close From 9110e5c313d9f9530c863d97d1da17c1ab6f2c30 Mon Sep 17 00:00:00 2001 From: Thibault Normand Date: Wed, 11 Dec 2024 11:26:55 +0100 Subject: [PATCH 2/9] feat(graph): production-grade tweaking. --- deployments/kubehound/graph/Dockerfile | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/deployments/kubehound/graph/Dockerfile b/deployments/kubehound/graph/Dockerfile index f3e0a879f..15acf3aec 100644 --- a/deployments/kubehound/graph/Dockerfile +++ b/deployments/kubehound/graph/Dockerfile @@ -7,7 +7,7 @@ COPY dsl/kubehound/pom.xml /home/app RUN mvn -f /home/app/pom.xml clean install # Now build our janusgraph wrapper container with KubeHound customizations -FROM janusgraph/janusgraph:1.0.0 +FROM janusgraph/janusgraph:1.1.0 LABEL org.opencontainers.image.source="https://github.com/DataDog/kubehound/" # Add our initialization script for the database schema to the startup directory @@ -59,9 +59,22 @@ ENV gremlinserver.metrics.slf4jReporter.enabled=false ENV gremlinserver.metrics.graphiteReporter.enabled=false ENV gremlinserver.metrics.csvReporter.enabled=false +# Add safety net settings to prevent OOM and other issues +ENV janusgraph.query.force-index=true +ENV janusgraph.cluster.max-partitions=512 +ENV janusgraph.query.batch=true +ENV janusgraph.query.hard-max-limit=10000 +ENV janusgraph.query.smart-limit=true + +# Enable cache +ENV janusgraph.cache.db-cache=true +ENV janusgraph.cache.db-cache-clean-wait=20 +ENV janusgraph.cache.db-cache-time=180000 +ENV janusgraph.cache.db-cache-size=0.5 + # Performance tweaks based on: https://www.sailpoint.com/blog/souping-up-the-gremlin/ # gremlinPool will default to Runtime.availableProcessors() -ENV gremlinserver.gremlinPool=0 +ENV gremlinserver.gremlinPool=8 # threadPoolWorker should be 2x VCPU (TODO: can we set dynamically?) ENV gremlinserver.threadPoolWorker=16 From 80ab64a45c9c1d4256eac7f4a90f48633c2e5192 Mon Sep 17 00:00:00 2001 From: Thibault Normand Date: Wed, 11 Dec 2024 11:27:26 +0100 Subject: [PATCH 3/9] feat(graph): use index for graph deletion. --- .../storage/graphdb/janusgraph_provider.go | 99 +++++++++++-------- 1 file changed, 56 insertions(+), 43 deletions(-) diff --git a/pkg/kubehound/storage/graphdb/janusgraph_provider.go b/pkg/kubehound/storage/graphdb/janusgraph_provider.go index 643def25e..f07fe1eb1 100644 --- a/pkg/kubehound/storage/graphdb/janusgraph_provider.go +++ b/pkg/kubehound/storage/graphdb/janusgraph_provider.go @@ -67,50 +67,63 @@ func (jgp *JanusGraphProvider) Prepare(ctx context.Context) error { return nil } - g := gremlin.Traversal_().WithRemote(jgp.drc) - tx := g.Tx() - defer tx.Close() - - for { - // Begin a new transaction. - gtx, err := tx.Begin() - if err != nil { - return err - } - - // Retrieve the number of vertices in the graph. - page, err := gtx.V().Count().Next() - if err != nil { - return err - } - - // Decode the number of vertices from the page. - count, err := page.GetInt() - if err != nil { - return err - } - - // If there are no more vertices to delete, break the loop. - if count == 0 { - break - } - - // Delete the vertices in the graph. - err = <-gtx.V().Limit(deleteBatchSize).Drop().Iterate() - if err != nil { - return err - } - - // Commit the transaction. - if err := tx.Commit(); err != nil { - return err - } + // These vertex types are defined in the schema. + vertexTypes := []string{ + "Container", + "Identity", + "Node", + "Pod", + "PermissionSet", + "Volume", + "Endpoint", + } - // Check context for cancellation. - select { - case <-ctx.Done(): - return ctx.Err() - default: + for _, vertexType := range vertexTypes { + g := gremlin.Traversal_().WithRemote(jgp.drc) + tx := g.Tx() + defer tx.Close() + + for { + // Begin a new transaction. + gtx, err := tx.Begin() + if err != nil { + return err + } + + // Retrieve the number of vertices in the graph. + page, err := gtx.V().Has("class", vertexType).Count().Next() + if err != nil { + return err + } + + // Decode the number of vertices from the page. + count, err := page.GetInt() + if err != nil { + return err + } + + // If there are no more vertices to delete, break the loop. + if count == 0 { + break + } + + // Delete the vertices in the graph. + err = <-gtx.V().Has("class", vertexType).Limit(deleteBatchSize).Drop().Iterate() + if err != nil { + return err + } + + // Commit the transaction. + if err := tx.Commit(); err != nil { + return err + } + + // Check context for cancellation. + select { + case <-ctx.Done(): + return ctx.Err() + default: + } } } From 92ee53ec0ad6b2118373dc3f8bfaaaa31ed0a057 Mon Sep 17 00:00:00 2001 From: Thibault Normand Date: Wed, 11 Dec 2024 11:27:58 +0100 Subject: [PATCH 4/9] feat(graph): optimise path traversal. --- .../ase/kubehound/KubeHoundTraversalDsl.java | 8 +-- .../ui/LowHangingFruit-ContainerEscape.ipynb | 66 +++++++++---------- 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/deployments/kubehound/graph/dsl/kubehound/src/main/java/com/datadog/ase/kubehound/KubeHoundTraversalDsl.java b/deployments/kubehound/graph/dsl/kubehound/src/main/java/com/datadog/ase/kubehound/KubeHoundTraversalDsl.java index 05d998c93..fe702b3eb 100644 --- a/deployments/kubehound/graph/dsl/kubehound/src/main/java/com/datadog/ase/kubehound/KubeHoundTraversalDsl.java +++ b/deployments/kubehound/graph/dsl/kubehound/src/main/java/com/datadog/ase/kubehound/KubeHoundTraversalDsl.java @@ -90,7 +90,7 @@ public default GraphTraversal criticalPaths(int maxHops) { return repeat(((KubeHoundTraversalDsl) __.outE()) .inV() - .simplePath()).until( + .simplePath().dedup()).until( __.has("critical", true) .or() .loops() @@ -126,7 +126,7 @@ public default GraphTraversal criticalPathsFilter(int maxHops, String.. return repeat(((KubeHoundTraversalDsl) __.outE()) .has("class", P.not(P.within(exclusions))) .inV() - .simplePath()).until( + .simplePath().dedup()).until( __.has("critical", true) .or() .loops() @@ -167,7 +167,7 @@ public default GraphTraversal minHopsToCritical(i throw new IllegalArgumentException(String.format("maxHops must be <= %d", PATH_HOPS_MAX)); return repeat(((KubeHoundTraversalDsl) __.out()) - .simplePath()).until( + .simplePath().dedup()).until( __.has("critical", true) .or() .loops() @@ -202,7 +202,7 @@ public default GraphTraversal> criticalPathsFreq(int maxHops return repeat( (KubeHoundTraversalDsl) __.outE() .inV() - .simplePath()) + .simplePath().dedup()) .emit() .until( __.has("critical", true) diff --git a/deployments/kubehound/ui/LowHangingFruit-ContainerEscape.ipynb b/deployments/kubehound/ui/LowHangingFruit-ContainerEscape.ipynb index 8b1736536..67f66c390 100644 --- a/deployments/kubehound/ui/LowHangingFruit-ContainerEscape.ipynb +++ b/deployments/kubehound/ui/LowHangingFruit-ContainerEscape.ipynb @@ -158,11 +158,11 @@ " .has(\"runID\", graph.variables().get('runID_yourid').get().trim())\n", " .where(\n", " repeat(\n", - " outE().inV().simplePath() // Building the path from one vertex to another\n", + " outE().inV().simplePath().dedup() // Building the path from one vertex to another\n", " ).until(\n", - " has(label, \"Node\") // Stop when meeting a critical asset\n", - " .or().loops().is(10) // Stop after X iteration\n", - " ).has(label, \"Node\") // Keep only path ending with a critical asset\n", + " has(\"class\", \"Node\") // Stop when meeting a critical asset\n", + " .or().loops().is(10) // Stop after X iteration\n", + " ).has(\"class\", \"Node\") // Keep only path ending with a critical asset\n", " .limit(1)\n", " )\n", " .dedup().by(\"image\")\n", @@ -201,11 +201,11 @@ " .has(\"runID\", graph.variables().get('runID_yourid').get().trim())\n", " .where(\n", " repeat(\n", - " outE().inV().simplePath() // Building the path from one vertex to another\n", + " outE().inV().simplePath().dedup() // Building the path from one vertex to another\n", " ).until(\n", - " has(label, \"Node\") // Stop when meeting a critical asset\n", - " .or().loops().is(10) // Stop after X iteration\n", - " ).has(label, \"Node\") // Keep only path ending with a critical asset\n", + " has(\"class\", \"Node\") // Stop when meeting a critical asset\n", + " .or().loops().is(10) // Stop after X iteration\n", + " ).has(\"class\", \"Node\") // Keep only path ending with a critical asset\n", " .limit(1)\n", " )\n", " .dedup()\n", @@ -313,11 +313,11 @@ " .has(\"runID\", graph.variables().get('runID_yourid').get().trim())\n", " .has(\"app\",graph.variables().get('containerEscape_vulnApp_yourid').get().trim())\n", " .repeat(\n", - " outE().inV().simplePath() // Building the path from one vertex to another\n", + " outE().inV().simplePath().dedup() // Building the path from one vertex to another\n", " ).until(\n", - " has(label, \"Node\") // Stop when meeting a critical asset\n", - " .or().loops().is(10) // Stop after X iteration\n", - " ).has(label, \"Node\") // Keep only path ending with a critical asset\n", + " has(\"class\", \"Node\") // Stop when meeting a critical asset\n", + " .or().loops().is(10) // Stop after X iteration\n", + " ).has(\"class\", \"Node\") // Keep only path ending with a critical asset\n", " .path().by(elementMap())\n", " .limit(1000)" ] @@ -360,11 +360,11 @@ " .has(\"runID\", graph.variables().get('runID_yourid').get().trim())\n", " .has(\"app\",graph.variables().get('containerEscape_vulnApp_yourid').get().trim())\n", " .repeat(\n", - " outE().inV().simplePath() // Building the path from one vertex to another\n", + " outE().inV().simplePath().dedup() // Building the path from one vertex to another\n", " ).until(\n", - " has(label, \"Node\") // Stop when meeting a critical asset\n", - " .or().loops().is(10) // Stop after X iteration\n", - " ).has(label, \"Node\") // Keep only path ending with a critical asset\n", + " has(\"class\", \"Node\") // Stop when meeting a critical asset\n", + " .or().loops().is(10) // Stop after X iteration\n", + " ).has(\"class\", \"Node\") // Keep only path ending with a critical asset\n", " .path()\n", " .by(valueMap(\"app\", \"class\",\"critical\").with(WithOptions.tokens,WithOptions.labels))\n", " .dedup()\n", @@ -402,11 +402,11 @@ " .has(\"runID\", graph.variables().get('runID_yourid').get().trim())\n", " .has(\"app\",graph.variables().get('containerEscape_vulnApp_yourid').get().trim())\n", " .repeat(\n", - " outE().inV().simplePath() // Building the path from one vertex to another\n", + " outE().inV().simplePath().dedup() // Building the path from one vertex to another\n", " ).until(\n", - " has(label, \"Node\") // Stop when meeting a critical asset\n", - " .or().loops().is(10) // Stop after X iteration\n", - " ).has(label, \"Node\") // Keep only path ending with a critical asset\n", + " has(\"class\", \"Node\") // Stop when meeting a critical asset\n", + " .or().loops().is(10) // Stop after X iteration\n", + " ).has(\"class\", \"Node\") // Keep only path ending with a critical asset\n", " .path().by(label())\n", " .dedup()\n", " .limit(1000)" @@ -438,11 +438,11 @@ " .or().has(\"namespace\", within(graph.variables().get('containerEscape_whiteListedNamespace_yourid').get()))\n", " )\n", " .repeat(\n", - " outE().inV().simplePath() // Building the path from one vertex to another\n", + " outE().inV().simplePath().dedup() // Building the path from one vertex to another\n", " ).until(\n", - " has(label, \"Node\") // Stop when meeting a critical asset\n", - " .or().loops().is(10) // Stop after X iteration\n", - " ).has(label, \"Node\") // Keep only path ending with a critical asset\n", + " has(\"class\", \"Node\") // Stop when meeting a critical asset\n", + " .or().loops().is(10) // Stop after X iteration\n", + " ).has(\"class\", \"Node\") // Keep only path ending with a critical asset\n", " .path().by(elementMap())\n", " .limit(1000)" ] @@ -469,11 +469,11 @@ " .or().has(\"namespace\", within(graph.variables().get('containerEscape_whiteListedNamespace_yourid').get()))\n", " )\n", " .repeat(\n", - " outE().inV().simplePath() // Building the path from one vertex to another\n", + " outE().inV().simplePath().dedup() // Building the path from one vertex to another\n", " ).until(\n", - " has(label, \"Node\") // Stop when meeting a critical asset\n", - " .or().loops().is(10) // Stop after X iteration\n", - " ).has(label, \"Node\") // Keep only path ending with a critical asset\n", + " has(\"class\", \"Node\") // Stop when meeting a critical asset\n", + " .or().loops().is(10) // Stop after X iteration\n", + " ).has(\"class\", \"Node\") // Keep only path ending with a critical asset\n", " .path()\n", " .by(valueMap(\"app\", \"class\",\"critical\").with(WithOptions.tokens,WithOptions.labels))\n", " .dedup()\n", @@ -502,12 +502,12 @@ " .or().has(\"namespace\", within(graph.variables().get('containerEscape_whiteListedNamespace_yourid').get()))\n", " )\n", " .repeat(\n", - " outE().inV().simplePath() // Building the path from one vertex to another\n", + " outE().inV().simplePath().dedup() // Building the path from one vertex to another\n", " ).until(\n", - " has(label, \"Node\") // Stop when meeting a critical asset\n", - " .or().loops().is(10) // Stop after X iteration\n", - " ).has(label, \"Node\") // Keep only path ending with a critical asset\n", - " .path().by(label())\n", + " has(\"class\", \"Node\") // Stop when meeting a critical asset\n", + " .or().loops().is(10) // Stop after X iteration\n", + " ).has(\"class\", \"Node\") // Keep only path ending with a critical asset\n", + " .path().by(\"class\")\n", " .dedup()\n", " .limit(1000)" ] From fae1eb0d8ac3f62c88b87cda777ca29dfab336a6 Mon Sep 17 00:00:00 2001 From: Thibault Normand Date: Wed, 11 Dec 2024 12:04:08 +0100 Subject: [PATCH 5/9] chore(test): fix system tests. --- pkg/kubehound/graph/vertex/types.go | 14 +++++++++ .../storage/graphdb/janusgraph_provider.go | 12 +------- test/system/graph_dsl_test.go | 3 +- test/system/graph_edge_test.go | 2 +- test/system/graph_vertex_test.go | 30 +++++++++++-------- 5 files changed, 35 insertions(+), 26 deletions(-) create mode 100644 pkg/kubehound/graph/vertex/types.go diff --git a/pkg/kubehound/graph/vertex/types.go b/pkg/kubehound/graph/vertex/types.go new file mode 100644 index 000000000..4c9413a07 --- /dev/null +++ b/pkg/kubehound/graph/vertex/types.go @@ -0,0 +1,14 @@ +package vertex + +var ( + // Labels is a list of all possible labels for a vertex in the graph. + Labels = []string{ + ContainerLabel, + EndpointLabel, + IdentityLabel, + NodeLabel, + PermissionSetLabel, + PodLabel, + VolumeLabel, + } +) diff --git a/pkg/kubehound/storage/graphdb/janusgraph_provider.go b/pkg/kubehound/storage/graphdb/janusgraph_provider.go index f07fe1eb1..b09359a46 100644 --- a/pkg/kubehound/storage/graphdb/janusgraph_provider.go +++ b/pkg/kubehound/storage/graphdb/janusgraph_provider.go @@ -68,17 +68,7 @@ func (jgp *JanusGraphProvider) Prepare(ctx context.Context) error { } // These vertex types are defined in the schema. - vertexTypes := []string{ - "Container", - "Identity", - "Node", - "Pod", - "PermissionSet", - "Volume", - "Endpoint", - } - - for _, vertexType := range vertexTypes { + for _, vertexType := range vertex.Labels { g := gremlin.Traversal_().WithRemote(jgp.drc) tx := g.Tx() defer tx.Close() diff --git a/test/system/graph_dsl_test.go b/test/system/graph_dsl_test.go index e39513c3f..f6978a333 100644 --- a/test/system/graph_dsl_test.go +++ b/test/system/graph_dsl_test.go @@ -259,7 +259,6 @@ func (suite *DslTestSuite) TestTraversal_criticalPaths() { // There are A LOT of paths in the test cluster. Just sample a few expected := []string{ "path[Endpoint, ENDPOINT_EXPLOIT, Container, IDENTITY_ASSUME, Identity, PERMISSION_DISCOVER, PermissionSet]", - "path[Endpoint, ENDPOINT_EXPLOIT, Container, VOLUME_DISCOVER, Volume, TOKEN_STEAL, Identity, PERMISSION_DISCOVER, PermissionSet]", "path[Endpoint, ENDPOINT_EXPLOIT, Container, CE_NSENTER, Node, IDENTITY_ASSUME, Identity, PERMISSION_DISCOVER, PermissionSet]", "path[Endpoint, ENDPOINT_EXPLOIT, Container, CE_MODULE_LOAD, Node, IDENTITY_ASSUME, Identity, PERMISSION_DISCOVER, PermissionSet]", "path[Endpoint, ENDPOINT_EXPLOIT, Container, CE_PRIV_MOUNT, Node, IDENTITY_ASSUME, Identity, PERMISSION_DISCOVER, PermissionSet]", @@ -283,7 +282,7 @@ func (suite *DslTestSuite) TestTraversal_minHopsToCritical() { serviceHops, err := res.GetInt() suite.NoError(err) - suite.Equal(serviceHops, 4) + suite.Equal(serviceHops, 3) // Container should have 1 less hop raw, err = suite.client.Submit("kh.containers().minHopsToCritical(6)") diff --git a/test/system/graph_edge_test.go b/test/system/graph_edge_test.go index 57df54e21..0298ff441 100644 --- a/test/system/graph_edge_test.go +++ b/test/system/graph_edge_test.go @@ -234,7 +234,7 @@ func (suite *EdgeTestSuite) TestEdge_IDENTITY_ASSUME_Node() { func (suite *EdgeTestSuite) TestEdge_POD_ATTACH() { // Every pod should have a POD_ATTACH incoming from a node rawCount, err := suite.g.V(). - HasLabel("Pod"). + Has("class", "Pod"). Count().Next() suite.NoError(err) diff --git a/test/system/graph_vertex_test.go b/test/system/graph_vertex_test.go index 20eaf2606..d8b5f82ff 100644 --- a/test/system/graph_vertex_test.go +++ b/test/system/graph_vertex_test.go @@ -356,20 +356,26 @@ func (suite *VertexTestSuite) TestVertexIdentity() { func (suite *VertexTestSuite) TestVertexClusterProperty() { // All vertices should have the cluster property set - results, err := suite.g.V(). - Values("cluster"). - Dedup(). - ToList() - - suite.NoError(err) - suite.GreaterOrEqual(len(results), 1) - - present := suite.resultsToStringArray(results) - expected := []string{ - "kind-kubehound.test.local", + for _, label := range vertex.Labels { + suite.Run(label, func() { + results, err := suite.g.V(). + Has("class", label). + Values("cluster"). + Dedup(). + ToList() + + suite.NoError(err) + suite.GreaterOrEqual(len(results), 1) + + present := suite.resultsToStringArray(results) + expected := []string{ + "kind-kubehound.test.local", + } + + suite.Subset(present, expected) + }) } - suite.Subset(present, expected) } func (suite *VertexTestSuite) TearDownSuite() { From f653c899b7e046d2cceff9e3243ea53aa6e07d49 Mon Sep 17 00:00:00 2001 From: Thibault Normand Date: Wed, 11 Dec 2024 12:22:53 +0100 Subject: [PATCH 6/9] feat(docker): JVM in docker options. --- deployments/kubehound/graph/Dockerfile | 2 +- deployments/kubehound/graph/conf/jvm.options | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/deployments/kubehound/graph/Dockerfile b/deployments/kubehound/graph/Dockerfile index 15acf3aec..14a04e348 100644 --- a/deployments/kubehound/graph/Dockerfile +++ b/deployments/kubehound/graph/Dockerfile @@ -74,7 +74,7 @@ ENV janusgraph.cache.db-cache-size=0.5 # Performance tweaks based on: https://www.sailpoint.com/blog/souping-up-the-gremlin/ # gremlinPool will default to Runtime.availableProcessors() -ENV gremlinserver.gremlinPool=8 +ENV gremlinserver.gremlinPool=0 # threadPoolWorker should be 2x VCPU (TODO: can we set dynamically?) ENV gremlinserver.threadPoolWorker=16 diff --git a/deployments/kubehound/graph/conf/jvm.options b/deployments/kubehound/graph/conf/jvm.options index 68454d4cf..5116176b4 100644 --- a/deployments/kubehound/graph/conf/jvm.options +++ b/deployments/kubehound/graph/conf/jvm.options @@ -63,7 +63,8 @@ ################# -XX:+UseG1GC +-XX:+UseContainerSupport -XX:SurvivorRatio=8 -XX:MaxTenuringThreshold=1 --javaagent:"/opt/janusgraph/lib/jmx_prometheus_javaagent-0.18.0.jar"=8099:/opt/janusgraph/lib/exporter-config.yaml \ No newline at end of file +-javaagent:"/opt/janusgraph/lib/jmx_prometheus_javaagent-0.18.0.jar"=8099:/opt/janusgraph/lib/exporter-config.yaml From 0671711a03e6232736f6583f6aac695242b4d375 Mon Sep 17 00:00:00 2001 From: Thibault Normand Date: Wed, 11 Dec 2024 14:19:11 +0100 Subject: [PATCH 7/9] feat(graph): revert dedup from DSL. --- deployments/kubehound/graph/Dockerfile | 6 ------ .../com/datadog/ase/kubehound/KubeHoundTraversalDsl.java | 8 ++++---- test/system/graph_dsl_test.go | 4 ++-- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/deployments/kubehound/graph/Dockerfile b/deployments/kubehound/graph/Dockerfile index 14a04e348..352a85ffc 100644 --- a/deployments/kubehound/graph/Dockerfile +++ b/deployments/kubehound/graph/Dockerfile @@ -66,12 +66,6 @@ ENV janusgraph.query.batch=true ENV janusgraph.query.hard-max-limit=10000 ENV janusgraph.query.smart-limit=true -# Enable cache -ENV janusgraph.cache.db-cache=true -ENV janusgraph.cache.db-cache-clean-wait=20 -ENV janusgraph.cache.db-cache-time=180000 -ENV janusgraph.cache.db-cache-size=0.5 - # Performance tweaks based on: https://www.sailpoint.com/blog/souping-up-the-gremlin/ # gremlinPool will default to Runtime.availableProcessors() ENV gremlinserver.gremlinPool=0 diff --git a/deployments/kubehound/graph/dsl/kubehound/src/main/java/com/datadog/ase/kubehound/KubeHoundTraversalDsl.java b/deployments/kubehound/graph/dsl/kubehound/src/main/java/com/datadog/ase/kubehound/KubeHoundTraversalDsl.java index fe702b3eb..05d998c93 100644 --- a/deployments/kubehound/graph/dsl/kubehound/src/main/java/com/datadog/ase/kubehound/KubeHoundTraversalDsl.java +++ b/deployments/kubehound/graph/dsl/kubehound/src/main/java/com/datadog/ase/kubehound/KubeHoundTraversalDsl.java @@ -90,7 +90,7 @@ public default GraphTraversal criticalPaths(int maxHops) { return repeat(((KubeHoundTraversalDsl) __.outE()) .inV() - .simplePath().dedup()).until( + .simplePath()).until( __.has("critical", true) .or() .loops() @@ -126,7 +126,7 @@ public default GraphTraversal criticalPathsFilter(int maxHops, String.. return repeat(((KubeHoundTraversalDsl) __.outE()) .has("class", P.not(P.within(exclusions))) .inV() - .simplePath().dedup()).until( + .simplePath()).until( __.has("critical", true) .or() .loops() @@ -167,7 +167,7 @@ public default GraphTraversal minHopsToCritical(i throw new IllegalArgumentException(String.format("maxHops must be <= %d", PATH_HOPS_MAX)); return repeat(((KubeHoundTraversalDsl) __.out()) - .simplePath().dedup()).until( + .simplePath()).until( __.has("critical", true) .or() .loops() @@ -202,7 +202,7 @@ public default GraphTraversal> criticalPathsFreq(int maxHops return repeat( (KubeHoundTraversalDsl) __.outE() .inV() - .simplePath().dedup()) + .simplePath()) .emit() .until( __.has("critical", true) diff --git a/test/system/graph_dsl_test.go b/test/system/graph_dsl_test.go index f6978a333..6d3d33b62 100644 --- a/test/system/graph_dsl_test.go +++ b/test/system/graph_dsl_test.go @@ -282,7 +282,7 @@ func (suite *DslTestSuite) TestTraversal_minHopsToCritical() { serviceHops, err := res.GetInt() suite.NoError(err) - suite.Equal(serviceHops, 3) + suite.Equal(5, serviceHops) // Container should have 1 less hop raw, err = suite.client.Submit("kh.containers().minHopsToCritical(6)") @@ -294,7 +294,7 @@ func (suite *DslTestSuite) TestTraversal_minHopsToCritical() { containerHops, err := res.GetInt() suite.NoError(err) - suite.Equal(containerHops, serviceHops-1) + suite.Equal(serviceHops-1, containerHops) } func (suite *DslTestSuite) TestTraversal_criticalPathsFilter() { From 8a3e920bd2b7c10fb88f86f1dce00d8693eef89f Mon Sep 17 00:00:00 2001 From: Thibault Normand Date: Wed, 11 Dec 2024 14:25:29 +0100 Subject: [PATCH 8/9] fix(ci): restroe test values. --- test/system/graph_dsl_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/system/graph_dsl_test.go b/test/system/graph_dsl_test.go index 6d3d33b62..1ca7a4ce6 100644 --- a/test/system/graph_dsl_test.go +++ b/test/system/graph_dsl_test.go @@ -282,7 +282,7 @@ func (suite *DslTestSuite) TestTraversal_minHopsToCritical() { serviceHops, err := res.GetInt() suite.NoError(err) - suite.Equal(5, serviceHops) + suite.Equal(4, serviceHops) // Container should have 1 less hop raw, err = suite.client.Submit("kh.containers().minHopsToCritical(6)") From efa60ec33e4cd73e88d2556849cdfa375c40de2e Mon Sep 17 00:00:00 2001 From: Thibault Normand Date: Wed, 11 Dec 2024 15:00:53 +0100 Subject: [PATCH 9/9] chore(graph): increase max-limit, and disable force-index. --- deployments/kubehound/graph/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deployments/kubehound/graph/Dockerfile b/deployments/kubehound/graph/Dockerfile index 352a85ffc..a702473cb 100644 --- a/deployments/kubehound/graph/Dockerfile +++ b/deployments/kubehound/graph/Dockerfile @@ -60,10 +60,10 @@ ENV gremlinserver.metrics.graphiteReporter.enabled=false ENV gremlinserver.metrics.csvReporter.enabled=false # Add safety net settings to prevent OOM and other issues -ENV janusgraph.query.force-index=true +ENV janusgraph.query.force-index=false ENV janusgraph.cluster.max-partitions=512 ENV janusgraph.query.batch=true -ENV janusgraph.query.hard-max-limit=10000 +ENV janusgraph.query.hard-max-limit=100000 ENV janusgraph.query.smart-limit=true # Performance tweaks based on: https://www.sailpoint.com/blog/souping-up-the-gremlin/