From 3ddcddae37ac62e38202ff3fb3510ef5bd33cfba Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Mon, 15 Sep 2025 06:35:52 -0700 Subject: [PATCH 01/20] Fix issue with CALL/YIELD for user defined and qualified functions. (#2217) Fixed 2 issues with CALL/YIELD - 1) If a user defined function was in search_path, the transform_FuncCall logic would only find it, if it were part of an extension. 2) If a function were qualified, the transform_cypher_call_subquery logic would mistakenly extract the schema name instead of the function name. NOTE: transform_FuncCall should be reviewed for possible refactor. Added regression tests. modified: src/backend/parser/cypher_clause.c modified: src/backend/parser/cypher_expr.c modified: regress/expected/cypher_call.out modified: regress/sql/cypher_call.sql --- regress/expected/cypher_call.out | 52 ++++++++++++++++++++++++++++++ regress/sql/cypher_call.sql | 25 ++++++++++++++ src/backend/parser/cypher_clause.c | 6 ++-- src/backend/parser/cypher_expr.c | 13 ++++---- 4 files changed, 86 insertions(+), 10 deletions(-) diff --git a/regress/expected/cypher_call.out b/regress/expected/cypher_call.out index 6980abe4b..bb1185b96 100644 --- a/regress/expected/cypher_call.out +++ b/regress/expected/cypher_call.out @@ -240,6 +240,58 @@ SELECT * FROM cypher('cypher_call', $$ CALL sqrt(64) YIELD sqrt CALL agtype_sum( ERROR: duplicate variable "sqrt" LINE 1: ...LL sqrt(64) YIELD sqrt CALL agtype_sum(2,2) YIELD agtype_sum... ^ +-- Fix CALL/YIELD issues +CREATE OR REPLACE FUNCTION myfunc(i agtype) +RETURNS agtype +LANGUAGE plpgsql +AS $$ +DECLARE + result agtype; +BEGIN + RETURN ag_catalog.age_sqrt(i); +END; +$$; +-- should have no errors +SELECT * FROM cypher('cypher_call', $$ CALL ag_catalog.age_sqrt(64) YIELD age_sqrt RETURN age_sqrt $$) as (sqrt agtype); + sqrt +------ + 8.0 +(1 row) + +SELECT * FROM cypher('cypher_call', $$ CALL myfunc(25) YIELD myfunc RETURN myfunc $$) as (result agtype); + result +-------- + 5.0 +(1 row) + +SELECT * FROM cypher('cypher_call', $$ CALL ag_catalog.myfunc(25) YIELD myfunc RETURN myfunc $$) as (result agtype); + result +-------- + 5.0 +(1 row) + +-- should error +SELECT * FROM cypher('cypher_call', $$ CALL myfunc() YIELD myfunc RETURN myfunc $$) as (result agtype); +ERROR: function myfunc does not exist +LINE 1: SELECT * FROM cypher('cypher_call', $$ CALL myfunc() YIELD m... + ^ +HINT: If the function is from an external extension, make sure the extension is installed and the function is in the search path. +SELECT * FROM cypher('cypher_call', $$ CALL myfunz(25) YIELD myfunc RETURN myfunc $$) as (result agtype); +ERROR: function myfunz does not exist +LINE 1: SELECT * FROM cypher('cypher_call', $$ CALL myfunz(25) YIELD... + ^ +HINT: If the function is from an external extension, make sure the extension is installed and the function is in the search path. +SELECT * FROM cypher('cypher_call', $$ CALL ag_catalog.myfunc() YIELD myfunc RETURN myfunc $$) as (result agtype); +ERROR: function ag_catalog.myfunc() does not exist +LINE 1: ...T * FROM cypher('cypher_call', $$ CALL ag_catalog.myfunc() Y... + ^ +HINT: No function matches the given name and argument types. You might need to add explicit type casts. +SELECT * FROM cypher('cypher_call', $$ CALL ag_catalog.myfunz(25) YIELD myfunc RETURN myfunc $$) as (result agtype); +ERROR: function ag_catalog.myfunz(agtype) does not exist +LINE 1: ...OM cypher('cypher_call', $$ CALL ag_catalog.myfunz(25) YIELD... + ^ +HINT: No function matches the given name and argument types. You might need to add explicit type casts. +DROP FUNCTION myfunc; DROP SCHEMA call_stmt_test CASCADE; NOTICE: drop cascades to function call_stmt_test.add_agtype(agtype,agtype) SELECT drop_graph('cypher_call', true); diff --git a/regress/sql/cypher_call.sql b/regress/sql/cypher_call.sql index 917276806..ee1468755 100644 --- a/regress/sql/cypher_call.sql +++ b/regress/sql/cypher_call.sql @@ -104,5 +104,30 @@ SELECT * FROM cypher('cypher_call', $$ CALL sqrt(64) YIELD sqrt AS sqrt1 CALL sq SELECT * FROM cypher('cypher_call', $$ CALL sqrt(64) YIELD sqrt AS sqrt1 CALL sqrt(81) YIELD sqrt AS sqrt1 RETURN sqrt1, sqrt1 $$) as (a agtype, b agtype); SELECT * FROM cypher('cypher_call', $$ CALL sqrt(64) YIELD sqrt CALL agtype_sum(2,2) YIELD agtype_sum AS sqrt RETURN sqrt, sqrt $$) as (a agtype, b agtype); +-- Fix CALL/YIELD issues +CREATE OR REPLACE FUNCTION myfunc(i agtype) +RETURNS agtype +LANGUAGE plpgsql +AS $$ +DECLARE + result agtype; +BEGIN + RETURN ag_catalog.age_sqrt(i); +END; +$$; + +-- should have no errors +SELECT * FROM cypher('cypher_call', $$ CALL ag_catalog.age_sqrt(64) YIELD age_sqrt RETURN age_sqrt $$) as (sqrt agtype); +SELECT * FROM cypher('cypher_call', $$ CALL myfunc(25) YIELD myfunc RETURN myfunc $$) as (result agtype); +SELECT * FROM cypher('cypher_call', $$ CALL ag_catalog.myfunc(25) YIELD myfunc RETURN myfunc $$) as (result agtype); + +-- should error +SELECT * FROM cypher('cypher_call', $$ CALL myfunc() YIELD myfunc RETURN myfunc $$) as (result agtype); +SELECT * FROM cypher('cypher_call', $$ CALL myfunz(25) YIELD myfunc RETURN myfunc $$) as (result agtype); +SELECT * FROM cypher('cypher_call', $$ CALL ag_catalog.myfunc() YIELD myfunc RETURN myfunc $$) as (result agtype); +SELECT * FROM cypher('cypher_call', $$ CALL ag_catalog.myfunz(25) YIELD myfunc RETURN myfunc $$) as (result agtype); + +DROP FUNCTION myfunc; + DROP SCHEMA call_stmt_test CASCADE; SELECT drop_graph('cypher_call', true); diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index 4d03be09e..1b39595b3 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -1148,7 +1148,7 @@ static Query *transform_cypher_call_subquery(cypher_parsestate *cpstate, EXPR_KIND_FROM_FUNCTION)); /* retrieve the column name from funccall */ - colName = strVal(linitial(self->funccall->funcname)); + colName = strVal(llast(self->funccall->funcname)); /* make a targetentry from the funcexpr node */ tle = makeTargetEntry((Expr *) node, @@ -3952,7 +3952,7 @@ static List *transform_map_to_ind_recursive(cypher_parsestate *cpstate, * * Transforms the map to a list of equality irrespective of * value type. For example, - * + * * x.name = 'xyz' * x.map = {"city": "abc", "street": {"name": "pqr", "number": 123}} * x.list = [9, 8, 7] @@ -4006,7 +4006,7 @@ static List *transform_map_to_ind_top_level(cypher_parsestate *cpstate, qual = (Node *)make_op(pstate, op, lhs, rhs, last_srf, -1); quals = lappend(quals, qual); } - + return quals; } diff --git a/src/backend/parser/cypher_expr.c b/src/backend/parser/cypher_expr.c index 390bfb392..993957c83 100644 --- a/src/backend/parser/cypher_expr.c +++ b/src/backend/parser/cypher_expr.c @@ -2036,7 +2036,7 @@ static Node *transform_FuncCall(cypher_parsestate *cpstate, FuncCall *fn) targs = lcons(c, targs); } } - /* + /* * If it's not in age, check if it's a potential call to some function * in another installed extension. */ @@ -2055,14 +2055,13 @@ static Node *transform_FuncCall(cypher_parsestate *cpstate, FuncCall *fn) procform, extension); return retval; } + /* + * Else we have a function that is in the search_path, and not + * qualified, but is not in an extension. Pass it through. + */ else { - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_FUNCTION), - errmsg("function %s does not exist", name), - errhint("If the function is from an external extension, " - "make sure the extension is installed and the " - "function is in the search path."))); + fname = fn->funcname; } } /* no function found */ From 11c33164fdd9831f068c3120751dbfa182e8db12 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 24 Sep 2025 10:50:25 -0700 Subject: [PATCH 02/20] Bump gopkg.in/yaml.v3 from 3.0.0 to 3.0.1 in /drivers/golang (#2212) Bumps gopkg.in/yaml.v3 from 3.0.0 to 3.0.1. --- updated-dependencies: - dependency-name: gopkg.in/yaml.v3 dependency-version: 3.0.1 dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- drivers/golang/go.mod | 2 +- drivers/golang/go.sum | 10 ++-------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/golang/go.mod b/drivers/golang/go.mod index 95628d9f2..e212c4323 100644 --- a/drivers/golang/go.mod +++ b/drivers/golang/go.mod @@ -31,5 +31,5 @@ require ( github.com/davecgh/go-spew v1.1.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect golang.org/x/exp v0.0.0-20240205201215-2c58cdc269a3 // indirect - gopkg.in/yaml.v3 v3.0.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/drivers/golang/go.sum b/drivers/golang/go.sum index 26bb0f3b4..eba2fed1e 100644 --- a/drivers/golang/go.sum +++ b/drivers/golang/go.sum @@ -2,8 +2,6 @@ github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230321174746-8dcc6526cfb1 h github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230321174746-8dcc6526cfb1/go.mod h1:pSwJ0fSY5KhvocuWSx4fz3BA8OrA1bQn+K1Eli3BRwM= github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/lib/pq v1.10.7 h1:p7ZhMD+KsSRozJr34udlUrhboJwWAgCg34+/ZZNvZZw= -github.com/lib/pq v1.10.7/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -11,14 +9,10 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -golang.org/x/exp v0.0.0-20230321023759-10a507213a29 h1:ooxPy7fPvB4kwsA2h+iBNHkAbp/4JxTSwCmvdjEYmug= -golang.org/x/exp v0.0.0-20230321023759-10a507213a29/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= -golang.org/x/exp v0.0.0-20240119083558-1b970713d09a h1:Q8/wZp0KX97QFTc2ywcOE0YRjZPVIx+MXInMzdvQqcA= -golang.org/x/exp v0.0.0-20240119083558-1b970713d09a/go.mod h1:idGWGoKP1toJGkd5/ig9ZLuPcZBC3ewk7SzmH0uou08= golang.org/x/exp v0.0.0-20240205201215-2c58cdc269a3 h1:/RIbNt/Zr7rVhIkQhooTxCxFcdWLGIKnZA4IXNFSrvo= golang.org/x/exp v0.0.0-20240205201215-2c58cdc269a3/go.mod h1:idGWGoKP1toJGkd5/ig9ZLuPcZBC3ewk7SzmH0uou08= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.0 h1:hjy8E9ON/egN1tAYqKb61G10WtihqetD4sz2H+8nIeA= -gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From 4069cfa7cfa3a8a8035e44618d53743450013a83 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Fri, 3 Oct 2025 18:32:42 -0700 Subject: [PATCH 03/20] Add fast functions for checking edge uniqueness (#2227) Added fast functions for checking edge uniqueness. This will help improve performance for MATCH queries with paths longer than 3 but less than 11. The normal edge uniqueness function will deal with any path 11 and over. modified: age--1.6.0--y.y.y.sql modified: sql/agtype_graphid.sql modified: src/backend/parser/cypher_clause.c modified: src/backend/utils/adt/age_vle.c --- age--1.6.0--y.y.y.sql | 20 ++++++++++++ sql/agtype_graphid.sql | 21 +++++++++++++ src/backend/parser/cypher_clause.c | 33 +++++++++++++++++--- src/backend/utils/adt/age_vle.c | 49 ++++++++++++++++++++++++++++++ 4 files changed, 118 insertions(+), 5 deletions(-) diff --git a/age--1.6.0--y.y.y.sql b/age--1.6.0--y.y.y.sql index 50b03458d..8c58bb3eb 100644 --- a/age--1.6.0--y.y.y.sql +++ b/age--1.6.0--y.y.y.sql @@ -33,3 +33,23 @@ --* file. We need to keep the order of these changes. --* REMOVE ALL LINES ABOVE, and this one, that start with --* +CREATE FUNCTION ag_catalog._ag_enforce_edge_uniqueness2(graphid, graphid) + RETURNS bool + LANGUAGE c + STABLE +PARALLEL SAFE +as 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog._ag_enforce_edge_uniqueness3(graphid, graphid, graphid) + RETURNS bool + LANGUAGE c + STABLE +PARALLEL SAFE +as 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog._ag_enforce_edge_uniqueness4(graphid, graphid, graphid, graphid) + RETURNS bool + LANGUAGE c + STABLE +PARALLEL SAFE +as 'MODULE_PATHNAME'; diff --git a/sql/agtype_graphid.sql b/sql/agtype_graphid.sql index 4e05943ae..0887db8a9 100644 --- a/sql/agtype_graphid.sql +++ b/sql/agtype_graphid.sql @@ -77,6 +77,27 @@ CALLED ON NULL INPUT PARALLEL SAFE AS 'MODULE_PATHNAME'; +CREATE FUNCTION ag_catalog._ag_enforce_edge_uniqueness2(graphid, graphid) + RETURNS bool + LANGUAGE c + STABLE +PARALLEL SAFE +as 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog._ag_enforce_edge_uniqueness3(graphid, graphid, graphid) + RETURNS bool + LANGUAGE c + STABLE +PARALLEL SAFE +as 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog._ag_enforce_edge_uniqueness4(graphid, graphid, graphid, graphid) + RETURNS bool + LANGUAGE c + STABLE +PARALLEL SAFE +as 'MODULE_PATHNAME'; + CREATE FUNCTION ag_catalog._ag_enforce_edge_uniqueness(VARIADIC "any") RETURNS bool LANGUAGE c diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index 1b39595b3..df96b3cdf 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -3271,13 +3271,13 @@ static FuncCall *prevent_duplicate_edges(cypher_parsestate *cpstate, { List *edges = NIL; ListCell *lc; - List *qualified_function_name; - String *ag_catalog, *edge_fn; + List *qualified_function_name = NULL; + String *ag_catalog; + String *edge_fn = NULL; + bool is_vle_edge = false; + int nentities = list_length(entities); ag_catalog = makeString("ag_catalog"); - edge_fn = makeString("_ag_enforce_edge_uniqueness"); - - qualified_function_name = list_make2(ag_catalog, edge_fn); /* iterate through each entity, collecting the access node for each edge */ foreach (lc, entities) @@ -3293,10 +3293,33 @@ static FuncCall *prevent_duplicate_edges(cypher_parsestate *cpstate, } else if (entity->type == ENT_VLE_EDGE) { + is_vle_edge = true; edges = lappend(edges, entity->expr); } } + if (!is_vle_edge && (nentities >= 5 && nentities <= 9)) + { + if (nentities == 5) + { + edge_fn = makeString("_ag_enforce_edge_uniqueness2"); + } + else if (nentities == 7) + { + edge_fn = makeString("_ag_enforce_edge_uniqueness3"); + } + else + { + edge_fn = makeString("_ag_enforce_edge_uniqueness4"); + } + } + else + { + edge_fn = makeString("_ag_enforce_edge_uniqueness"); + } + + qualified_function_name = list_make2(ag_catalog, edge_fn); + return makeFuncCall(qualified_function_name, edges, COERCE_SQL_SYNTAX, -1); } diff --git a/src/backend/utils/adt/age_vle.c b/src/backend/utils/adt/age_vle.c index f0adab2e9..f9e4c70b8 100644 --- a/src/backend/utils/adt/age_vle.c +++ b/src/backend/utils/adt/age_vle.c @@ -2427,6 +2427,55 @@ Datum age_build_vle_match_edge(PG_FUNCTION_ARGS) PG_RETURN_POINTER(agtype_value_to_agtype(result.res)); } +PG_FUNCTION_INFO_V1(_ag_enforce_edge_uniqueness2); + +Datum _ag_enforce_edge_uniqueness2(PG_FUNCTION_ARGS) +{ + graphid gid1 = AG_GETARG_GRAPHID(0); + graphid gid2 = AG_GETARG_GRAPHID(1); + + if (gid1 == gid2) + { + PG_RETURN_BOOL(false); + } + + PG_RETURN_BOOL(true); +} + +PG_FUNCTION_INFO_V1(_ag_enforce_edge_uniqueness3); + +Datum _ag_enforce_edge_uniqueness3(PG_FUNCTION_ARGS) +{ + graphid gid1 = AG_GETARG_GRAPHID(0); + graphid gid2 = AG_GETARG_GRAPHID(1); + graphid gid3 = AG_GETARG_GRAPHID(2); + + if (gid1 == gid2 || gid1 == gid3 || gid2 == gid3) + { + PG_RETURN_BOOL(false); + } + + PG_RETURN_BOOL(true); +} + +PG_FUNCTION_INFO_V1(_ag_enforce_edge_uniqueness4); + +Datum _ag_enforce_edge_uniqueness4(PG_FUNCTION_ARGS) +{ + graphid gid1 = AG_GETARG_GRAPHID(0); + graphid gid2 = AG_GETARG_GRAPHID(1); + graphid gid3 = AG_GETARG_GRAPHID(2); + graphid gid4 = AG_GETARG_GRAPHID(3); + + if (gid1 == gid2 || gid1 == gid3 || gid1 == gid4 || + gid2 == gid3 || gid2 == gid4 || gid3 == gid4) + { + PG_RETURN_BOOL(false); + } + + PG_RETURN_BOOL(true); +} + /* * This function checks the edges in a MATCH clause to see if they are unique or * not. Filters out all the paths where the edge uniques rules are not met. From 1c7bc56d6b318851397800ea66e95a3703236355 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Thu, 13 Nov 2025 07:57:41 -0800 Subject: [PATCH 04/20] Fix issue 2243 - Regression in string concatenation (#2244) Fixed issue 2243 - Regression in string concatenation using the + operator. The issue was in versions 1.5.0 and 1.6.0, at least. It was due to using Int8GetDatum instead of Int64GetDatum for the agtype integer field in the following functions - get_numeric_datum_from_agtype_value get_string_from_agtype_value This impacted more than what the original issue covered, but those additional cases were resolved too. Added regression tests. modified: regress/expected/agtype.out modified: regress/sql/agtype.sql modified: src/backend/utils/adt/agtype_ops.c --- regress/expected/agtype.out | 101 +++++++++++++++++++++++++++++ regress/sql/agtype.sql | 47 ++++++++++++++ src/backend/utils/adt/agtype_ops.c | 4 +- 3 files changed, 150 insertions(+), 2 deletions(-) diff --git a/regress/expected/agtype.out b/regress/expected/agtype.out index d4a577c00..065f357f1 100644 --- a/regress/expected/agtype.out +++ b/regress/expected/agtype.out @@ -3776,9 +3776,110 @@ SELECT agtype_build_map('1', '1', 2, 2, 3.14, 3.14, 'e', 2.71); {"1": "1", "2": 2, "e": 2.71::numeric, "3.14": 3.14::numeric} (1 row) +-- +-- Bug found from issue 2043 - Regression in string concatenation using the + operator +-- +-- This bug impacted specific numeric cases too. +-- +SELECT * FROM create_graph('issue_2243'); +NOTICE: graph "issue_2243" has been created + create_graph +-------------- + +(1 row) + +SELECT * FROM cypher('issue_2243', $$ + CREATE (n30164502:Node {data_id: 30164502}) + RETURN id(n30164502) + ':test_n' + n30164502.data_id + $$ ) as (result agtype); + result +---------------------------------- + "844424930131969:test_n30164502" +(1 row) + +-- concat / add +SELECT * FROM cypher('issue_2243', $$ + RETURN 9223372036854775807::integer + ":test_n" + 9223372036854775807::integer + $$ ) as (result agtype); + result +------------------------------------------------- + "9223372036854775807:test_n9223372036854775807" +(1 row) + +SELECT * FROM cypher('issue_2243', $$ + RETURN 9223372036854775807::numeric + 9223372036854775807::integer + $$ ) as (result agtype); + result +------------------------------- + 18446744073709551614::numeric +(1 row) + +-- sub +SELECT * FROM cypher('issue_2243', $$ + RETURN 9223372036854775807::numeric - 9223372036854775807::integer + $$ ) as (result agtype); + result +------------ + 0::numeric +(1 row) + +-- mul +SELECT * FROM cypher('issue_2243', $$ + RETURN 9223372036854775807::numeric * 9223372036854775807::integer + $$ ) as (result agtype); + result +------------------------------------------------- + 85070591730234615847396907784232501249::numeric +(1 row) + +-- div +SELECT * FROM cypher('issue_2243', $$ + RETURN 9223372036854775807::numeric / 9223372036854775807::integer + $$ ) as (result agtype); + result +--------------------------------- + 1.00000000000000000000::numeric +(1 row) + +SELECT * FROM cypher('issue_2243', $$ + RETURN 9223372036854775807::integer / 9223372036854775807::numeric + $$ ) as (result agtype); + result +--------------------------------- + 1.00000000000000000000::numeric +(1 row) + +-- mod +SELECT * FROM cypher('issue_2243', $$ + RETURN 9223372036854775807::numeric % 9223372036854775807::integer + $$ ) as (result agtype); + result +------------ + 0::numeric +(1 row) + +SELECT * FROM cypher('issue_2243', $$ + RETURN 9223372036854775807::integer % 9223372036854775807::numeric + $$ ) as (result agtype); + result +------------ + 0::numeric +(1 row) + -- -- Cleanup -- +SELECT drop_graph('issue_2243', true); +NOTICE: drop cascades to 3 other objects +DETAIL: drop cascades to table issue_2243._ag_label_vertex +drop cascades to table issue_2243._ag_label_edge +drop cascades to table issue_2243."Node" +NOTICE: graph "issue_2243" has been dropped + drop_graph +------------ + +(1 row) + SELECT drop_graph('agtype_build_map', true); NOTICE: drop cascades to 2 other objects DETAIL: drop cascades to table agtype_build_map._ag_label_vertex diff --git a/regress/sql/agtype.sql b/regress/sql/agtype.sql index 016f457f2..6dab6bc30 100644 --- a/regress/sql/agtype.sql +++ b/regress/sql/agtype.sql @@ -1075,9 +1075,56 @@ SELECT * FROM cypher('agtype_build_map', $$ RETURN ag_catalog.agtype_build_map(' $$) AS (results agtype); SELECT agtype_build_map('1', '1', 2, 2, 3.14, 3.14, 'e', 2.71); +-- +-- Bug found from issue 2043 - Regression in string concatenation using the + operator +-- +-- This bug impacted specific numeric cases too. +-- +SELECT * FROM create_graph('issue_2243'); +SELECT * FROM cypher('issue_2243', $$ + CREATE (n30164502:Node {data_id: 30164502}) + RETURN id(n30164502) + ':test_n' + n30164502.data_id + $$ ) as (result agtype); + +-- concat / add +SELECT * FROM cypher('issue_2243', $$ + RETURN 9223372036854775807::integer + ":test_n" + 9223372036854775807::integer + $$ ) as (result agtype); + +SELECT * FROM cypher('issue_2243', $$ + RETURN 9223372036854775807::numeric + 9223372036854775807::integer + $$ ) as (result agtype); + +-- sub +SELECT * FROM cypher('issue_2243', $$ + RETURN 9223372036854775807::numeric - 9223372036854775807::integer + $$ ) as (result agtype); + +-- mul +SELECT * FROM cypher('issue_2243', $$ + RETURN 9223372036854775807::numeric * 9223372036854775807::integer + $$ ) as (result agtype); + +-- div +SELECT * FROM cypher('issue_2243', $$ + RETURN 9223372036854775807::numeric / 9223372036854775807::integer + $$ ) as (result agtype); +SELECT * FROM cypher('issue_2243', $$ + RETURN 9223372036854775807::integer / 9223372036854775807::numeric + $$ ) as (result agtype); + +-- mod +SELECT * FROM cypher('issue_2243', $$ + RETURN 9223372036854775807::numeric % 9223372036854775807::integer + $$ ) as (result agtype); +SELECT * FROM cypher('issue_2243', $$ + RETURN 9223372036854775807::integer % 9223372036854775807::numeric + $$ ) as (result agtype); + -- -- Cleanup -- +SELECT drop_graph('issue_2243', true); SELECT drop_graph('agtype_build_map', true); DROP TABLE agtype_table; diff --git a/src/backend/utils/adt/agtype_ops.c b/src/backend/utils/adt/agtype_ops.c index c6c13aadb..d831447b0 100644 --- a/src/backend/utils/adt/agtype_ops.c +++ b/src/backend/utils/adt/agtype_ops.c @@ -68,7 +68,7 @@ static char *get_string_from_agtype_value(agtype_value *agtv, int *length) { case AGTV_INTEGER: number = DirectFunctionCall1(int8out, - Int8GetDatum(agtv->val.int_value)); + Int64GetDatum(agtv->val.int_value)); string = DatumGetCString(number); *length = strlen(string); return string; @@ -115,7 +115,7 @@ Datum get_numeric_datum_from_agtype_value(agtype_value *agtv) { case AGTV_INTEGER: return DirectFunctionCall1(int8_numeric, - Int8GetDatum(agtv->val.int_value)); + Int64GetDatum(agtv->val.int_value)); case AGTV_FLOAT: return DirectFunctionCall1(float8_numeric, Float8GetDatum(agtv->val.float_value)); From 9bb21cfb6578e8d0c290540a627dd6323efd48d0 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Wed, 19 Nov 2025 13:45:44 -0800 Subject: [PATCH 05/20] Fix issue 2245 - Creating more than 41 vlabels causes crash in drop_graph (#2248) Fixed issue 2245 - Creating more than 41 vlabels causes drop_grapth to fail with "label (relation) cache corrupted" and crashing out on the following command. This was due to corruption of the label_relation_cache during the HASH_DELETE process. As the issue was with a cache flush routine, it was necessary to fix them all. Here is the list of the flush functions that were fixed - static void flush_graph_name_cache(void) static void flush_graph_namespace_cache(void) static void flush_label_name_graph_cache(void) static void flush_label_graph_oid_cache(void) static void flush_label_relation_cache(void) static void flush_label_seq_name_graph_cache(void) Added regression tests. modified: regress/expected/catalog.out modified: regress/sql/catalog.sql modified: src/backend/utils/cache/ag_cache.c --- regress/expected/catalog.out | 141 ++++++++++++++++++++++- regress/sql/catalog.sql | 42 ++++++- src/backend/utils/cache/ag_cache.c | 174 ++++++++++------------------- 3 files changed, 241 insertions(+), 116 deletions(-) diff --git a/regress/expected/catalog.out b/regress/expected/catalog.out index d06a0ce67..a15fa4698 100644 --- a/regress/expected/catalog.out +++ b/regress/expected/catalog.out @@ -457,7 +457,146 @@ NOTICE: graph does not exist (1 row) DROP FUNCTION raise_notice(TEXT); --- dropping the graph +-- +-- Fix issue 2245 - Creating more than 41 vlabels causes drop_graph to fail with +-- label (relation) cache corrupted +-- +-- this result will change if another graph was created prior to this point. +SELECT count(*) FROM ag_label; + count +------- + 2 +(1 row) + +SELECT * FROM create_graph('issue_2245'); +NOTICE: graph "issue_2245" has been created + create_graph +-------------- + +(1 row) + +SELECT * FROM cypher('issue_2245', $$ + CREATE (a1:Part1 {part_num: '123'}), (a2:Part2 {part_num: '345'}), (a3:Part3 {part_num: '456'}), + (a4:Part4 {part_num: '789'}), (a5:Part5 {part_num: '123'}), (a6:Part6 {part_num: '345'}), + (a7:Part7 {part_num: '456'}), (a8:Part8 {part_num: '789'}), (a9:Part9 {part_num: '123'}), + (a10:Part10 {part_num: '345'}), (a11:Part11 {part_num: '456'}), (a12:Part12 {part_num: '789'}), + (a13:Part13 {part_num: '123'}), (a14:Part14 {part_num: '345'}), (a15:Part15 {part_num: '456'}), + (a16:Part16 {part_num: '789'}), (a17:Part17 {part_num: '123'}), (a18:Part18 {part_num: '345'}), + (a19:Part19 {part_num: '456'}), (a20:Part20 {part_num: '789'}), (a21:Part21 {part_num: '123'}), + (a22:Part22 {part_num: '345'}), (a23:Part23 {part_num: '456'}), (a24:Part24 {part_num: '789'}), + (a25:Part25 {part_num: '123'}), (a26:Part26 {part_num: '345'}), (a27:Part27 {part_num: '456'}), + (a28:Part28 {part_num: '789'}), (a29:Part29 {part_num: '789'}), (a30:Part30 {part_num: '123'}), + (a31:Part31 {part_num: '345'}), (a32:Part32 {part_num: '456'}), (a33:Part33 {part_num: '789'}), + (a34:Part34 {part_num: '123'}), (a35:Part35 {part_num: '345'}), (a36:Part36 {part_num: '456'}), + (a37:Part37 {part_num: '789'}), (a38:Part38 {part_num: '123'}), (a39:Part39 {part_num: '345'}), + (a40:Part40 {part_num: '456'}), (a41:Part41 {part_num: '789'}), (a42:Part42 {part_num: '345'}), + (a43:Part43 {part_num: '456'}), (a44:Part44 {part_num: '789'}), (a45:Part45 {part_num: '456'}), + (a46:Part46 {part_num: '789'}), (a47:Part47 {part_num: '456'}), (a48:Part48 {part_num: '789'}), + (a49:Part49 {part_num: '789'}), (a50:Part50 {part_num: '456'}), (a51:Part51 {part_num: '789'}) + $$) AS (result agtype); + result +-------- +(0 rows) + +SELECT count(*) FROM ag_label; + count +------- + 55 +(1 row) + +SELECT drop_graph('issue_2245', true); +NOTICE: drop cascades to 53 other objects +DETAIL: drop cascades to table issue_2245._ag_label_vertex +drop cascades to table issue_2245._ag_label_edge +drop cascades to table issue_2245."Part1" +drop cascades to table issue_2245."Part2" +drop cascades to table issue_2245."Part3" +drop cascades to table issue_2245."Part4" +drop cascades to table issue_2245."Part5" +drop cascades to table issue_2245."Part6" +drop cascades to table issue_2245."Part7" +drop cascades to table issue_2245."Part8" +drop cascades to table issue_2245."Part9" +drop cascades to table issue_2245."Part10" +drop cascades to table issue_2245."Part11" +drop cascades to table issue_2245."Part12" +drop cascades to table issue_2245."Part13" +drop cascades to table issue_2245."Part14" +drop cascades to table issue_2245."Part15" +drop cascades to table issue_2245."Part16" +drop cascades to table issue_2245."Part17" +drop cascades to table issue_2245."Part18" +drop cascades to table issue_2245."Part19" +drop cascades to table issue_2245."Part20" +drop cascades to table issue_2245."Part21" +drop cascades to table issue_2245."Part22" +drop cascades to table issue_2245."Part23" +drop cascades to table issue_2245."Part24" +drop cascades to table issue_2245."Part25" +drop cascades to table issue_2245."Part26" +drop cascades to table issue_2245."Part27" +drop cascades to table issue_2245."Part28" +drop cascades to table issue_2245."Part29" +drop cascades to table issue_2245."Part30" +drop cascades to table issue_2245."Part31" +drop cascades to table issue_2245."Part32" +drop cascades to table issue_2245."Part33" +drop cascades to table issue_2245."Part34" +drop cascades to table issue_2245."Part35" +drop cascades to table issue_2245."Part36" +drop cascades to table issue_2245."Part37" +drop cascades to table issue_2245."Part38" +drop cascades to table issue_2245."Part39" +drop cascades to table issue_2245."Part40" +drop cascades to table issue_2245."Part41" +drop cascades to table issue_2245."Part42" +drop cascades to table issue_2245."Part43" +drop cascades to table issue_2245."Part44" +drop cascades to table issue_2245."Part45" +drop cascades to table issue_2245."Part46" +drop cascades to table issue_2245."Part47" +drop cascades to table issue_2245."Part48" +drop cascades to table issue_2245."Part49" +drop cascades to table issue_2245."Part50" +drop cascades to table issue_2245."Part51" +NOTICE: graph "issue_2245" has been dropped + drop_graph +------------ + +(1 row) + +-- this result should be the same as the one before the create_graph +SELECT count(*) FROM ag_label; + count +------- + 2 +(1 row) + +-- create the graph again +SELECT * FROM create_graph('issue_2245'); +NOTICE: graph "issue_2245" has been created + create_graph +-------------- + +(1 row) + +SELECT count(*) FROM ag_label; + count +------- + 4 +(1 row) + +-- dropping the graphs +SELECT drop_graph('issue_2245', true); +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to table issue_2245._ag_label_vertex +drop cascades to table issue_2245._ag_label_edge +NOTICE: graph "issue_2245" has been dropped + drop_graph +------------ + +(1 row) + SELECT drop_graph('graph', true); NOTICE: drop cascades to 2 other objects DETAIL: drop cascades to table graph._ag_label_vertex diff --git a/regress/sql/catalog.sql b/regress/sql/catalog.sql index 85fc4e8ab..bb72c3495 100644 --- a/regress/sql/catalog.sql +++ b/regress/sql/catalog.sql @@ -193,5 +193,45 @@ SELECT raise_notice('graph1'); DROP FUNCTION raise_notice(TEXT); --- dropping the graph +-- +-- Fix issue 2245 - Creating more than 41 vlabels causes drop_graph to fail with +-- label (relation) cache corrupted +-- + +-- this result will change if another graph was created prior to this point. +SELECT count(*) FROM ag_label; + +SELECT * FROM create_graph('issue_2245'); +SELECT * FROM cypher('issue_2245', $$ + CREATE (a1:Part1 {part_num: '123'}), (a2:Part2 {part_num: '345'}), (a3:Part3 {part_num: '456'}), + (a4:Part4 {part_num: '789'}), (a5:Part5 {part_num: '123'}), (a6:Part6 {part_num: '345'}), + (a7:Part7 {part_num: '456'}), (a8:Part8 {part_num: '789'}), (a9:Part9 {part_num: '123'}), + (a10:Part10 {part_num: '345'}), (a11:Part11 {part_num: '456'}), (a12:Part12 {part_num: '789'}), + (a13:Part13 {part_num: '123'}), (a14:Part14 {part_num: '345'}), (a15:Part15 {part_num: '456'}), + (a16:Part16 {part_num: '789'}), (a17:Part17 {part_num: '123'}), (a18:Part18 {part_num: '345'}), + (a19:Part19 {part_num: '456'}), (a20:Part20 {part_num: '789'}), (a21:Part21 {part_num: '123'}), + (a22:Part22 {part_num: '345'}), (a23:Part23 {part_num: '456'}), (a24:Part24 {part_num: '789'}), + (a25:Part25 {part_num: '123'}), (a26:Part26 {part_num: '345'}), (a27:Part27 {part_num: '456'}), + (a28:Part28 {part_num: '789'}), (a29:Part29 {part_num: '789'}), (a30:Part30 {part_num: '123'}), + (a31:Part31 {part_num: '345'}), (a32:Part32 {part_num: '456'}), (a33:Part33 {part_num: '789'}), + (a34:Part34 {part_num: '123'}), (a35:Part35 {part_num: '345'}), (a36:Part36 {part_num: '456'}), + (a37:Part37 {part_num: '789'}), (a38:Part38 {part_num: '123'}), (a39:Part39 {part_num: '345'}), + (a40:Part40 {part_num: '456'}), (a41:Part41 {part_num: '789'}), (a42:Part42 {part_num: '345'}), + (a43:Part43 {part_num: '456'}), (a44:Part44 {part_num: '789'}), (a45:Part45 {part_num: '456'}), + (a46:Part46 {part_num: '789'}), (a47:Part47 {part_num: '456'}), (a48:Part48 {part_num: '789'}), + (a49:Part49 {part_num: '789'}), (a50:Part50 {part_num: '456'}), (a51:Part51 {part_num: '789'}) + $$) AS (result agtype); + +SELECT count(*) FROM ag_label; +SELECT drop_graph('issue_2245', true); + +-- this result should be the same as the one before the create_graph +SELECT count(*) FROM ag_label; + +-- create the graph again +SELECT * FROM create_graph('issue_2245'); +SELECT count(*) FROM ag_label; + +-- dropping the graphs +SELECT drop_graph('issue_2245', true); SELECT drop_graph('graph', true); diff --git a/src/backend/utils/cache/ag_cache.c b/src/backend/utils/cache/ag_cache.c index e3c4d0794..493ffcfa9 100644 --- a/src/backend/utils/cache/ag_cache.c +++ b/src/backend/utils/cache/ag_cache.c @@ -286,52 +286,34 @@ static void invalidate_graph_caches(Datum arg, int cache_id, uint32 hash_value) static void flush_graph_name_cache(void) { - HASH_SEQ_STATUS hash_seq; - - hash_seq_init(&hash_seq, graph_name_cache_hash); - for (;;) + /* + * If the graph_name_cache exists, destroy it. This will avoid any + * potential corruption issues. + */ + if (graph_name_cache_hash) { - graph_name_cache_entry *entry; - void *removed; - - entry = hash_seq_search(&hash_seq); - if (!entry) - { - break; - } - removed = hash_search(graph_name_cache_hash, &entry->name, HASH_REMOVE, - NULL); - if (!removed) - { - ereport(ERROR, (errmsg_internal("graph (name) cache corrupted"))); - } + hash_destroy(graph_name_cache_hash); + graph_name_cache_hash = NULL; } + + /* recreate the graph_name_cache */ + create_graph_name_cache(); } static void flush_graph_namespace_cache(void) { - HASH_SEQ_STATUS hash_seq; - - hash_seq_init(&hash_seq, graph_namespace_cache_hash); - for (;;) + /* + * If the graph_namespace_cache exists, destroy it. This will avoid any + * potential corruption issues. + */ + if (graph_namespace_cache_hash) { - graph_namespace_cache_entry *entry; - void *removed; - - entry = hash_seq_search(&hash_seq); - if (!entry) - { - break; - } - - removed = hash_search(graph_namespace_cache_hash, &entry->namespace, - HASH_REMOVE, NULL); - if (!removed) - { - ereport(ERROR, - (errmsg_internal("graph (namespace) cache corrupted"))); - } + hash_destroy(graph_namespace_cache_hash); + graph_namespace_cache_hash = NULL; } + + /* recreate the graph_namespace_cache */ + create_graph_namespace_cache(); } graph_cache_data *search_graph_name_cache(const char *name) @@ -664,27 +646,18 @@ static void invalidate_label_name_graph_cache(Oid relid) static void flush_label_name_graph_cache(void) { - HASH_SEQ_STATUS hash_seq; - - hash_seq_init(&hash_seq, label_name_graph_cache_hash); - for (;;) + /* + * If the label_name_graph_cache exists, destroy it. This will avoid any + * potential corruption issues. + */ + if (label_name_graph_cache_hash) { - label_name_graph_cache_entry *entry; - void *removed; - - entry = hash_seq_search(&hash_seq); - if (!entry) - { - break; - } - removed = hash_search(label_name_graph_cache_hash, &entry->key, - HASH_REMOVE, NULL); - if (!removed) - { - ereport(ERROR, - (errmsg_internal("label (name, graph) cache corrupted"))); - } + hash_destroy(label_name_graph_cache_hash); + label_name_graph_cache_hash = NULL; } + + /* recreate the label_name_graph_cache */ + create_label_name_graph_cache(); } static void invalidate_label_graph_oid_cache(Oid relid) @@ -722,27 +695,18 @@ static void invalidate_label_graph_oid_cache(Oid relid) static void flush_label_graph_oid_cache(void) { - HASH_SEQ_STATUS hash_seq; - - hash_seq_init(&hash_seq, label_graph_oid_cache_hash); - for (;;) + /* + * If the label_graph_oid_cache exists, destroy it. This will avoid any + * potential corruption issues. + */ + if (label_graph_oid_cache_hash) { - label_graph_oid_cache_entry *entry; - void *removed; - - entry = hash_seq_search(&hash_seq); - if (!entry) - { - break; - } - removed = hash_search(label_graph_oid_cache_hash, &entry->key, - HASH_REMOVE, NULL); - if (!removed) - { - ereport(ERROR, - (errmsg_internal("label (graph, id) cache corrupted"))); - } + hash_destroy(label_graph_oid_cache_hash); + label_graph_oid_cache_hash = NULL; } + + /* recreate the label_graph_oid_cache */ + create_label_graph_oid_cache(); } static void invalidate_label_relation_cache(Oid relid) @@ -765,27 +729,18 @@ static void invalidate_label_relation_cache(Oid relid) static void flush_label_relation_cache(void) { - HASH_SEQ_STATUS hash_seq; - - hash_seq_init(&hash_seq, label_relation_cache_hash); - for (;;) + /* + * If the label_relation_cache exists, destroy it. This will avoid any + * potential corruption issues. + */ + if (label_relation_cache_hash) { - label_relation_cache_entry *entry; - void *removed; - - entry = hash_seq_search(&hash_seq); - if (!entry) - { - break; - } - removed = hash_search(label_relation_cache_hash, &entry->relation, - HASH_REMOVE, NULL); - if (!removed) - { - ereport(ERROR, - (errmsg_internal("label (relation) cache corrupted"))); - } + hash_destroy(label_relation_cache_hash); + label_relation_cache_hash = NULL; } + + /* recreate the label_relation_cache */ + create_label_relation_cache(); } static void invalidate_label_seq_name_graph_cache(Oid relid) @@ -823,27 +778,18 @@ static void invalidate_label_seq_name_graph_cache(Oid relid) static void flush_label_seq_name_graph_cache(void) { - HASH_SEQ_STATUS hash_seq; - - hash_seq_init(&hash_seq, label_seq_name_graph_cache_hash); - for (;;) + /* + * If the label_seq_name_graph_cache exists, destroy it. This will + * avoid any potential corruption issues by deleting entries. + */ + if (label_seq_name_graph_cache_hash) { - label_seq_name_graph_cache_entry *entry; - void *removed; - - entry = hash_seq_search(&hash_seq); - if (!entry) - { - break; - } - removed = hash_search(label_seq_name_graph_cache_hash, &entry->key, - HASH_REMOVE, NULL); - if (!removed) - { - ereport(ERROR, - (errmsg_internal("label (seq_name, graph) cache corrupted"))); - } + hash_destroy(label_seq_name_graph_cache_hash); + label_seq_name_graph_cache_hash = NULL; } + + /* recreate the label_seq_name_graph_cache */ + create_label_seq_name_graph_cache(); } label_cache_data *search_label_name_graph_cache(const char *name, Oid graph) From aa618311c5a7c9e9cade2918e4ab68b4c011c48f Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Tue, 9 Dec 2025 10:51:01 -0800 Subject: [PATCH 06/20] Fix Issue 2256: segmentation fault when calling coalesce function (#2259) Fixed issue 2256: A segmentation fault occurs when calling the coalesce function in PostgreSQL version 17. This likely predates 17 and includes other similar types of "functions". And other versions of PostgreSQL. See issues 1124 (PR 1125) and 1303 (PR 1317) for more details. This issue is due to coalesce() being processed differently from other functions. Additionally, greatest() was found to exhibit the same behavior. They were added to the list of types to ignore during the cypher analyze phase. A few others were added: CaseExpr, XmlExpr, ArrayExpr, & RowExpr. Although, I wasn't able to find cases where these caused crashes. Added regression tests. modified: regress/expected/cypher.out modified: regress/sql/cypher.sql modified: src/backend/parser/cypher_analyze.c Conflicts: src/backend/parser/cypher_analyze.c --- regress/expected/cypher.out | 16 ++++++++++++++++ regress/sql/cypher.sql | 7 +++++++ src/backend/parser/cypher_analyze.c | 26 ++++++++++++++++++++------ 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/regress/expected/cypher.out b/regress/expected/cypher.out index 31bafc6cf..53ea9f0c1 100644 --- a/regress/expected/cypher.out +++ b/regress/expected/cypher.out @@ -169,6 +169,22 @@ CREATE TABLE my_edges AS -- create a table of 4 columns, u, e, v, p. should be 5 rows CREATE TABLE my_detailed_paths AS (SELECT * FROM cypher('issue_1767', $$ MATCH p=(u)-[e]->(v) RETURN u,e,v,p $$) as (u agtype, e agtype, v agtype, p agtype)); +-- +-- Issue 2256: A segmentation fault occurs when calling the coalesce function +-- This also occurs with the greatest function too. +-- +SELECT * FROM coalesce(1, 0); + coalesce +---------- + 1 +(1 row) + +SELECT * FROM greatest(1, 0); + greatest +---------- + 1 +(1 row) + -- dump out the tables SELECT * FROM my_vertices; u diff --git a/regress/sql/cypher.sql b/regress/sql/cypher.sql index 7ded61ee6..090c7e704 100644 --- a/regress/sql/cypher.sql +++ b/regress/sql/cypher.sql @@ -94,6 +94,13 @@ CREATE TABLE my_edges AS CREATE TABLE my_detailed_paths AS (SELECT * FROM cypher('issue_1767', $$ MATCH p=(u)-[e]->(v) RETURN u,e,v,p $$) as (u agtype, e agtype, v agtype, p agtype)); +-- +-- Issue 2256: A segmentation fault occurs when calling the coalesce function +-- This also occurs with the greatest function too. +-- +SELECT * FROM coalesce(1, 0); +SELECT * FROM greatest(1, 0); + -- dump out the tables SELECT * FROM my_vertices; SELECT * FROM my_edges; diff --git a/src/backend/parser/cypher_analyze.c b/src/backend/parser/cypher_analyze.c index d53fba3ef..ba703fb00 100644 --- a/src/backend/parser/cypher_analyze.c +++ b/src/backend/parser/cypher_analyze.c @@ -169,13 +169,20 @@ static bool convert_cypher_walker(Node *node, ParseState *pstate) * Const - constant value or expression node * BoolExpr - expression node for the basic Boolean operators AND, OR, NOT * - * These are a special case that needs to be ignored. + * Added the following, although only the first 2 caused crashes in tests - + * CoalesceExpr, MinMaxExpr, CaseExpr, XmlExpr, ArrayExpr, RowExpr + * + * These are all special case that needs to be ignored. * */ if (IsA(funcexpr, SQLValueFunction) - || IsA(funcexpr, CoerceViaIO) - || IsA(funcexpr, Var) || IsA(funcexpr, OpExpr) - || IsA(funcexpr, Const) || IsA(funcexpr, BoolExpr)) + || IsA(funcexpr, CoerceViaIO) + || IsA(funcexpr, Var) || IsA(funcexpr, OpExpr) + || IsA(funcexpr, Const) || IsA(funcexpr, BoolExpr) + || IsA(funcexpr, JsonConstructorExpr) + || IsA(funcexpr, CoalesceExpr) || IsA(funcexpr, MinMaxExpr) + || IsA(funcexpr, CaseExpr) || IsA(funcexpr, XmlExpr) + || IsA(funcexpr, ArrayExpr) || IsA(funcexpr, RowExpr)) { return false; } @@ -341,13 +348,20 @@ static bool is_func_cypher(FuncExpr *funcexpr) * Const - constant value or expression node * BoolExpr - expression node for the basic Boolean operators AND, OR, NOT * - * These are a special case that needs to be ignored. + * Added the following, although only the first 2 caused crashes in tests - + * CoalesceExpr, MinMaxExpr, CaseExpr, XmlExpr, ArrayExpr, RowExpr + * + * These are all special case that needs to be ignored. * */ if (IsA(funcexpr, SQLValueFunction) || IsA(funcexpr, CoerceViaIO) || IsA(funcexpr, Var) || IsA(funcexpr, OpExpr) - || IsA(funcexpr, Const) || IsA(funcexpr, BoolExpr)) + || IsA(funcexpr, Const) || IsA(funcexpr, BoolExpr) + || IsA(funcexpr, JsonConstructorExpr) + || IsA(funcexpr, CoalesceExpr) || IsA(funcexpr, MinMaxExpr) + || IsA(funcexpr, CaseExpr) || IsA(funcexpr, XmlExpr) + || IsA(funcexpr, ArrayExpr) || IsA(funcexpr, RowExpr)) { return false; } From 839c1f8a98342a45c21beb05c6a79af8303d3584 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Tue, 9 Dec 2025 11:15:26 -0800 Subject: [PATCH 07/20] Adjust 'could not find rte for' ERROR message (#2266) Adjusted the following type of error message. It was mentioned in issue 2263 as being incorrect, which it isn't. However, it did need some clarification added - ERROR: could not find rte for Added a HINT for additional clarity - HINT: variable does not exist within scope of usage For example: CREATE p0=(n0), (n1{k:EXISTS{WITH p0}}) RETURN 1 ERROR: could not find rte for p0 LINE 3: CREATE p0=(n0), (n1{k:EXISTS{WITH p0}}) ^ HINT: variable p0 does not exist within scope of usage Additionally, added pstate->p_expr_kind == EXPR_KIND_INSERT_TARGET to transform_cypher_clause_as_subquery. Updated existing regression tests. Added regression tests from issue. modified: regress/expected/cypher_call.out modified: regress/expected/cypher_subquery.out modified: regress/expected/cypher_union.out modified: regress/expected/cypher_with.out modified: regress/expected/expr.out modified: regress/expected/list_comprehension.out modified: regress/expected/scan.out modified: src/backend/parser/cypher_clause.c modified: src/backend/parser/cypher_expr.c Resolved Conflicts: regress/expected/cypher_subquery.out --- regress/expected/cypher_call.out | 2 + regress/expected/cypher_subquery.out | 4 ++ regress/expected/cypher_union.out | 1 + regress/expected/cypher_with.out | 4 ++ regress/expected/expr.out | 66 +++++++++++++++++++++++++ regress/expected/list_comprehension.out | 2 + regress/expected/scan.out | 6 +++ regress/sql/expr.sql | 27 ++++++++++ src/backend/parser/cypher_clause.c | 1 + src/backend/parser/cypher_expr.c | 8 +-- 10 files changed, 118 insertions(+), 3 deletions(-) diff --git a/regress/expected/cypher_call.out b/regress/expected/cypher_call.out index bb1185b96..08f97ba41 100644 --- a/regress/expected/cypher_call.out +++ b/regress/expected/cypher_call.out @@ -125,6 +125,7 @@ SELECT * FROM cypher('cypher_call', $$CALL sqrt(64) YIELD sqrt WHERE a = 8 RETUR ERROR: could not find rte for a LINE 2: ...r('cypher_call', $$CALL sqrt(64) YIELD sqrt WHERE a = 8 RETU... ^ +HINT: variable a does not exist within scope of usage /* MATCH CALL RETURN, should fail */ SELECT * FROM cypher('cypher_call', $$ MATCH (a) CALL sqrt(64) RETURN sqrt $$) as (sqrt agtype); ERROR: Procedure call inside a query does not support naming results implicitly @@ -171,6 +172,7 @@ SELECT * FROM cypher('cypher_call', $$ MATCH (a) CALL sqrt(64) YIELD sqrt WHERE ERROR: could not find rte for b LINE 1: ...all', $$ MATCH (a) CALL sqrt(64) YIELD sqrt WHERE b = 8 RETU... ^ +HINT: variable b does not exist within scope of usage /* CALL MATCH YIELD WHERE UPDATE/RETURN */ SELECT * FROM cypher('cypher_call', $$ CALL sqrt(64) YIELD sqrt WHERE sqrt > 1 CREATE ({n:'c'}) $$) as (a agtype); a diff --git a/regress/expected/cypher_subquery.out b/regress/expected/cypher_subquery.out index 559f0c67b..9c8b66b19 100644 --- a/regress/expected/cypher_subquery.out +++ b/regress/expected/cypher_subquery.out @@ -134,6 +134,7 @@ SELECT * FROM cypher('subquery', $$ MATCH (a:person) ERROR: could not find rte for c LINE 5: RETURN c ^ +HINT: variable c does not exist within scope of usage --union, no returns, not yet implemented, should error out SELECT * FROM cypher('subquery', $$ MATCH (a:person) WHERE EXISTS { @@ -317,6 +318,7 @@ SELECT * FROM cypher('subquery', $$ RETURN 1, ERROR: could not find rte for a LINE 4: RETURN a ^ +HINT: variable a does not exist within scope of usage --- COUNT --count pattern subquery in where SELECT * FROM cypher('subquery', $$ MATCH (a:person) @@ -507,6 +509,7 @@ SELECT * FROM cypher('subquery', $$ MATCH (a:person) ERROR: could not find rte for b LINE 2: RETURN a.name, COUNT{MATCH (a) RETURN b} $$) ^ +HINT: variable b does not exist within scope of usage --incorrect nested variable reference SELECT * FROM cypher('subquery', $$ MATCH (a:person) RETURN a.name, COUNT{MATCH (a) @@ -516,6 +519,7 @@ SELECT * FROM cypher('subquery', $$ MATCH (a:person) ERROR: could not find rte for b LINE 4: RETURN b} $$) ^ +HINT: variable b does not exist within scope of usage --count nested with exists SELECT * FROM cypher('subquery', $$ MATCH (a:person) RETURN a.name, diff --git a/regress/expected/cypher_union.out b/regress/expected/cypher_union.out index 063354ddb..14fa56e67 100644 --- a/regress/expected/cypher_union.out +++ b/regress/expected/cypher_union.out @@ -141,6 +141,7 @@ SELECT * FROM cypher('cypher_union', $$MATCH (n) RETURN n UNION ALL MATCH (m) RE ERROR: could not find rte for n LINE 2: ..., $$MATCH (n) RETURN n UNION ALL MATCH (m) RETURN n$$) AS (r... ^ +HINT: variable n does not exist within scope of usage /* *UNION and UNION ALL, type casting */ diff --git a/regress/expected/cypher_with.out b/regress/expected/cypher_with.out index e5f82aa21..99ea320a0 100644 --- a/regress/expected/cypher_with.out +++ b/regress/expected/cypher_with.out @@ -267,6 +267,7 @@ $$) AS (a agtype, b agtype); ERROR: could not find rte for b LINE 4: RETURN m,b ^ +HINT: variable b does not exist within scope of usage SELECT * FROM cypher('cypher_with', $$ MATCH (m)-[]->(b) WITH m AS start_node,b AS end_node @@ -278,6 +279,7 @@ $$) AS (id agtype, node agtype); ERROR: could not find rte for end_node LINE 7: RETURN id(start_node),end_node.name ^ +HINT: variable end_node does not exist within scope of usage -- Clean up SELECT drop_graph('cypher_with', true); NOTICE: drop cascades to 4 other objects @@ -320,6 +322,7 @@ $$) AS (n agtype, d agtype); ERROR: could not find rte for d LINE 8: RETURN c,d ^ +HINT: variable d does not exist within scope of usage -- Issue 396 (should error out) SELECT * FROM cypher('graph',$$ CREATE (v),(u),(w), @@ -338,6 +341,7 @@ $$) as (a agtype,b agtype); ERROR: could not find rte for v LINE 4: RETURN v,path_length ^ +HINT: variable v does not exist within scope of usage -- Clean up SELECT drop_graph('graph', true); NOTICE: drop cascades to 6 other objects diff --git a/regress/expected/expr.out b/regress/expected/expr.out index 9b5dd0a16..523561fa5 100644 --- a/regress/expected/expr.out +++ b/regress/expected/expr.out @@ -3370,6 +3370,7 @@ $$) AS (toBooleanList agtype); ERROR: could not find rte for fail LINE 2: RETURN toBooleanList(fail) ^ +HINT: variable fail does not exist within scope of usage SELECT * FROM cypher('expr', $$ RETURN toBooleanList("fail") $$) AS (toBooleanList agtype); @@ -3513,6 +3514,7 @@ $$) AS (toFloatList agtype); ERROR: could not find rte for failed LINE 2: RETURN toFloatList([failed]) ^ +HINT: variable failed does not exist within scope of usage SELECT * FROM cypher('expr', $$ RETURN toFloatList("failed") $$) AS (toFloatList agtype); @@ -3892,12 +3894,14 @@ $$) AS (toStringList agtype); ERROR: could not find rte for b LINE 2: RETURN toStringList([['a', b]]) ^ +HINT: variable b does not exist within scope of usage SELECT * FROM cypher('expr', $$ RETURN toStringList([test]) $$) AS (toStringList agtype); ERROR: could not find rte for test LINE 2: RETURN toStringList([test]) ^ +HINT: variable test does not exist within scope of usage -- -- reverse(string) -- @@ -7923,6 +7927,7 @@ SELECT * FROM cypher('list', $$ RETURN tail(abc) $$) AS (tail agtype); ERROR: could not find rte for abc LINE 1: SELECT * FROM cypher('list', $$ RETURN tail(abc) $$) AS (tai... ^ +HINT: variable abc does not exist within scope of usage SELECT * FROM cypher('list', $$ RETURN tail() $$) AS (tail agtype); ERROR: function ag_catalog.age_tail() does not exist LINE 1: SELECT * FROM cypher('list', $$ RETURN tail() $$) AS (tail a... @@ -9011,9 +9016,70 @@ SELECT agtype_hash_cmp(agtype_in('[null, null, null, null, null]')); -505290721 (1 row) +-- +-- Issue 2263: AGE returns incorrect error message for EXISTS subquery outer variable reference +-- +-- NOTE: There isn't really anything incorrect about the message. However, +-- it could be more clear. +-- +SELECT * FROM create_graph('issue_2263'); +NOTICE: graph "issue_2263" has been created + create_graph +-------------- + +(1 row) + +SELECT * FROM cypher('issue_2263', $$ + CREATE a=()-[:T]->(), p=({k:exists{return a}})-[:T]->() + RETURN 1 +$$) AS (one agtype); +ERROR: could not find rte for a +LINE 2: CREATE a=()-[:T]->(), p=({k:exists{return a}})-[:T]->() + ^ +HINT: variable a does not exist within scope of usage +SELECT * FROM cypher('issue_2263', $$ + CREATE p0=(n0), (n1{k:EXISTS{WITH p0}}) + RETURN 1 +$$) AS (one agtype); +ERROR: could not find rte for p0 +LINE 2: CREATE p0=(n0), (n1{k:EXISTS{WITH p0}}) + ^ +HINT: variable p0 does not exist within scope of usage +SELECT * FROM cypher('issue_2263', $$ + CREATE ()-[r4 :T6]->(), ({k2:COUNT{WITH r4.k AS a3 UNWIND [] AS a4 WITH DISTINCT NULL AS a5}}) + RETURN 1 +$$) AS (one agtype); +ERROR: could not find rte for r4 +LINE 2: CREATE ()-[r4 :T6]->(), ({k2:COUNT{WITH r4.k AS a3 UNWIN... + ^ +HINT: variable r4 does not exist within scope of usage +SELECT * FROM cypher('issue_2263', $$ + CREATE (x), ({a1:EXISTS { RETURN COUNT(0) AS a2, keys(x) AS a4 }}) +$$) AS (out agtype); +ERROR: could not find rte for x +LINE 2: ...TE (x), ({a1:EXISTS { RETURN COUNT(0) AS a2, keys(x) AS a4 }... + ^ +HINT: variable x does not exist within scope of usage +SELECT * FROM cypher('issue_2263', $$ + CREATE x = (), ({ a0:COUNT { MATCH () WHERE CASE WHEN true THEN (x IS NULL) END RETURN 0 } }) +$$) AS (out agtype); +ERROR: could not find rte for x +LINE 2: ...({ a0:COUNT { MATCH () WHERE CASE WHEN true THEN (x IS NULL)... + ^ +HINT: variable x does not exist within scope of usage -- -- Cleanup -- +SELECT * FROM drop_graph('issue_2263', true); +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to table issue_2263._ag_label_vertex +drop cascades to table issue_2263._ag_label_edge +NOTICE: graph "issue_2263" has been dropped + drop_graph +------------ + +(1 row) + SELECT * FROM drop_graph('issue_1988', true); NOTICE: drop cascades to 4 other objects DETAIL: drop cascades to table issue_1988._ag_label_vertex diff --git a/regress/expected/list_comprehension.out b/regress/expected/list_comprehension.out index 07f777707..5a3756422 100644 --- a/regress/expected/list_comprehension.out +++ b/regress/expected/list_comprehension.out @@ -569,10 +569,12 @@ SELECT * FROM cypher('list_comprehension', $$ RETURN [i IN range(0, 10, 2)],i $$ ERROR: could not find rte for i LINE 1: ..._comprehension', $$ RETURN [i IN range(0, 10, 2)],i $$) AS (... ^ +HINT: variable i does not exist within scope of usage SELECT * FROM cypher('list_comprehension', $$ RETURN [i IN range(0, 10, 2) WHERE i>5 | i^2], i $$) AS (result agtype, i agtype); ERROR: could not find rte for i LINE 1: ...$$ RETURN [i IN range(0, 10, 2) WHERE i>5 | i^2], i $$) AS (... ^ +HINT: variable i does not exist within scope of usage -- Invalid list comprehension SELECT * FROM cypher('list_comprehension', $$ RETURN [1 IN range(0, 10, 2) WHERE 2>5] $$) AS (result agtype); ERROR: Syntax error at or near IN diff --git a/regress/expected/scan.out b/regress/expected/scan.out index d8105a053..46d5676d0 100644 --- a/regress/expected/scan.out +++ b/regress/expected/scan.out @@ -437,36 +437,42 @@ $$) AS t(id text); ERROR: could not find rte for _$09A_z LINE 2: RETURN _$09A_z ^ +HINT: variable _$09A_z does not exist within scope of usage SELECT * FROM cypher('scan', $$ RETURN A $$) AS t(id text); ERROR: could not find rte for A LINE 2: RETURN A ^ +HINT: variable A does not exist within scope of usage SELECT * FROM cypher('scan', $$ RETURN z $$) AS t(id text); ERROR: could not find rte for z LINE 2: RETURN z ^ +HINT: variable z does not exist within scope of usage SELECT * FROM cypher('scan', $$ RETURN `$` $$) AS t(id text); ERROR: could not find rte for $ LINE 2: RETURN `$` ^ +HINT: variable $ does not exist within scope of usage SELECT * FROM cypher('scan', $$ RETURN `0` $$) AS t(id text); ERROR: could not find rte for 0 LINE 2: RETURN `0` ^ +HINT: variable 0 does not exist within scope of usage SELECT * FROM cypher('scan', $$ RETURN ```` $$) AS t(id text); ERROR: could not find rte for ` LINE 2: RETURN ```` ^ +HINT: variable ` does not exist within scope of usage -- zero-length quoted identifier SELECT * FROM cypher('scan', $$ RETURN `` diff --git a/regress/sql/expr.sql b/regress/sql/expr.sql index 28936c404..f3f55d3be 100644 --- a/regress/sql/expr.sql +++ b/regress/sql/expr.sql @@ -3634,9 +3634,36 @@ SELECT * FROM cypher('issue_1988', $$ SELECT agtype_access_operator(agtype_in('[null, null]')); SELECT agtype_hash_cmp(agtype_in('[null, null, null, null, null]')); +-- +-- Issue 2263: AGE returns incorrect error message for EXISTS subquery outer variable reference +-- +-- NOTE: There isn't really anything incorrect about the message. However, +-- it could be more clear. +-- +SELECT * FROM create_graph('issue_2263'); +SELECT * FROM cypher('issue_2263', $$ + CREATE a=()-[:T]->(), p=({k:exists{return a}})-[:T]->() + RETURN 1 +$$) AS (one agtype); +SELECT * FROM cypher('issue_2263', $$ + CREATE p0=(n0), (n1{k:EXISTS{WITH p0}}) + RETURN 1 +$$) AS (one agtype); +SELECT * FROM cypher('issue_2263', $$ + CREATE ()-[r4 :T6]->(), ({k2:COUNT{WITH r4.k AS a3 UNWIND [] AS a4 WITH DISTINCT NULL AS a5}}) + RETURN 1 +$$) AS (one agtype); +SELECT * FROM cypher('issue_2263', $$ + CREATE (x), ({a1:EXISTS { RETURN COUNT(0) AS a2, keys(x) AS a4 }}) +$$) AS (out agtype); +SELECT * FROM cypher('issue_2263', $$ + CREATE x = (), ({ a0:COUNT { MATCH () WHERE CASE WHEN true THEN (x IS NULL) END RETURN 0 } }) +$$) AS (out agtype); + -- -- Cleanup -- +SELECT * FROM drop_graph('issue_2263', true); SELECT * FROM drop_graph('issue_1988', true); SELECT * FROM drop_graph('issue_1953', true); SELECT * FROM drop_graph('expanded_map', true); diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index df96b3cdf..fc10428ae 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -6310,6 +6310,7 @@ transform_cypher_clause_as_subquery(cypher_parsestate *cpstate, pstate->p_expr_kind == EXPR_KIND_OTHER || pstate->p_expr_kind == EXPR_KIND_WHERE || pstate->p_expr_kind == EXPR_KIND_SELECT_TARGET || + pstate->p_expr_kind == EXPR_KIND_INSERT_TARGET || pstate->p_expr_kind == EXPR_KIND_FROM_SUBSELECT); /* diff --git a/src/backend/parser/cypher_expr.c b/src/backend/parser/cypher_expr.c index 993957c83..5f4de86b9 100644 --- a/src/backend/parser/cypher_expr.c +++ b/src/backend/parser/cypher_expr.c @@ -423,9 +423,11 @@ static Node *transform_ColumnRef(cypher_parsestate *cpstate, ColumnRef *cref) else { ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_COLUMN), - errmsg("could not find rte for %s", colname), - parser_errposition(pstate, cref->location))); + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("could not find rte for %s", colname), + errhint("variable %s does not exist within scope of usage", + colname), + parser_errposition(pstate, cref->location))); } if (node == NULL) From ea18db01116ab48ab7130c69970c46e9175c1c9a Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Wed, 10 Dec 2025 09:08:36 -0800 Subject: [PATCH 08/20] Fix ORDER BY alias resolution with AS in Cypher queries (#2269) NOTE: This PR was partially created with AI tools and reviewed by a human. ORDER BY clauses failed when referencing column aliases from RETURN: MATCH (p:Person) RETURN p.age AS age ORDER BY age DESC ERROR: could not find rte for age Added SQL-99 compliant alias matching to find_target_list_entry() that checks if ORDER BY identifier matches a target list alias before attempting expression transformation. This enables standard SQL behavior for sorting by aliased columns with DESC/DESCENDING/ASC/ASCENDING. Updated regression tests. Added regression tests. modified: regress/expected/cypher_match.out modified: regress/expected/expr.out modified: regress/sql/expr.sql modified: src/backend/parser/cypher_clause.c --- regress/expected/cypher_match.out | 24 ++++---- regress/expected/expr.out | 88 ++++++++++++++++++++++++++++++ regress/sql/expr.sql | 30 ++++++++++ src/backend/parser/cypher_clause.c | 26 +++++++++ 4 files changed, 156 insertions(+), 12 deletions(-) diff --git a/regress/expected/cypher_match.out b/regress/expected/cypher_match.out index e83ba3b93..ed2b3da08 100644 --- a/regress/expected/cypher_match.out +++ b/regress/expected/cypher_match.out @@ -1655,10 +1655,10 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (u agtype, m agtype, l agtype); u | m | l ------------+---------------+------------ - "someone" | "opt_match_e" | "somebody" - "somebody" | "opt_match_e" | "someone" "anybody" | "opt_match_e" | "nobody" "nobody" | "opt_match_e" | "anybody" + "somebody" | "opt_match_e" | "someone" + "someone" | "opt_match_e" | "somebody" (4 rows) SELECT * FROM cypher('cypher_match', $$ @@ -1670,8 +1670,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (n agtype, r agtype, p agtype, m agtype, s agtype, q agtype); n | r | p | m | s | q -----------+---------------+------------+-----------+---------------+------------ - "someone" | "opt_match_e" | "somebody" | "anybody" | "opt_match_e" | "nobody" "anybody" | "opt_match_e" | "nobody" | "someone" | "opt_match_e" | "somebody" + "someone" | "opt_match_e" | "somebody" | "anybody" | "opt_match_e" | "nobody" (2 rows) SELECT * FROM cypher('cypher_match', $$ @@ -1684,18 +1684,18 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (n agtype, r agtype, p agtype, m agtype, s agtype, q agtype); n | r | p | m | s | q ------------+---------------+------------+------------+---------------+------------ - "someone" | "opt_match_e" | "somebody" | "anybody" | "opt_match_e" | "nobody" - "someone" | | | "somebody" | | - "someone" | | | "nobody" | | - "somebody" | | | "someone" | | - "somebody" | | | "anybody" | | - "somebody" | | | "nobody" | | "anybody" | "opt_match_e" | "nobody" | "someone" | "opt_match_e" | "somebody" - "anybody" | | | "somebody" | | "anybody" | | | "nobody" | | - "nobody" | | | "someone" | | - "nobody" | | | "somebody" | | + "anybody" | | | "somebody" | | "nobody" | | | "anybody" | | + "nobody" | | | "somebody" | | + "nobody" | | | "someone" | | + "somebody" | | | "anybody" | | + "somebody" | | | "nobody" | | + "somebody" | | | "someone" | | + "someone" | "opt_match_e" | "somebody" | "anybody" | "opt_match_e" | "nobody" + "someone" | | | "nobody" | | + "someone" | | | "somebody" | | (12 rows) -- Tests to catch match following optional match logic diff --git a/regress/expected/expr.out b/regress/expected/expr.out index 523561fa5..33498b06b 100644 --- a/regress/expected/expr.out +++ b/regress/expected/expr.out @@ -6949,6 +6949,94 @@ $$) AS (i agtype); {"key": "value"} (9 rows) +-- +-- Test ORDER BY with AS +-- +SELECT * FROM cypher('order_by', $$ CREATE ({name: 'John', age: 38}) $$) AS (result agtype); + result +-------- +(0 rows) + +SELECT * FROM cypher('order_by', $$ CREATE ({name: 'Jill', age: 23}) $$) AS (result agtype); + result +-------- +(0 rows) + +SELECT * FROM cypher('order_by', $$ CREATE ({name: 'Ion', age: 34}) $$) AS (result agtype); + result +-------- +(0 rows) + +SELECT * FROM cypher('order_by', $$ CREATE ({name: 'Mary', age: 57}) $$) AS (result agtype); + result +-------- +(0 rows) + +SELECT * FROM cypher('order_by', $$ CREATE ({name: 'Jerry', age: 34}) $$) AS (result agtype); + result +-------- +(0 rows) + +SELECT * FROM cypher('order_by', $$ + MATCH (u) WHERE EXISTS(u.name) RETURN u.name AS name, u.age AS age ORDER BY name +$$) AS (name agtype, age agtype); + name | age +---------+----- + "Ion" | 34 + "Jerry" | 34 + "Jill" | 23 + "John" | 38 + "Mary" | 57 +(5 rows) + +SELECT * FROM cypher('order_by', $$ + MATCH (u) WHERE EXISTS(u.name) RETURN u.name AS name, u.age AS age ORDER BY name ASC +$$) AS (name agtype, age agtype); + name | age +---------+----- + "Ion" | 34 + "Jerry" | 34 + "Jill" | 23 + "John" | 38 + "Mary" | 57 +(5 rows) + +SELECT * FROM cypher('order_by', $$ + MATCH (u) WHERE EXISTS(u.name) RETURN u.name AS name, u.age AS age ORDER BY name DESC +$$) AS (name agtype, age agtype); + name | age +---------+----- + "Mary" | 57 + "John" | 38 + "Jill" | 23 + "Jerry" | 34 + "Ion" | 34 +(5 rows) + +SELECT * FROM cypher('order_by', $$ + MATCH (u) WHERE EXISTS(u.name) RETURN u.name AS name, u.age AS age ORDER BY age ASC, name DESCENDING +$$) AS (name agtype, age agtype); + name | age +---------+----- + "Jill" | 23 + "Jerry" | 34 + "Ion" | 34 + "John" | 38 + "Mary" | 57 +(5 rows) + +SELECT * FROM cypher('order_by', $$ + MATCH (u) WHERE EXISTS(u.name) RETURN u.name AS name, u.age AS age ORDER BY age DESC, name ASCENDING +$$) AS (name agtype, age agtype); + name | age +---------+----- + "Mary" | 57 + "John" | 38 + "Ion" | 34 + "Jerry" | 34 + "Jill" | 23 +(5 rows) + --CASE SELECT create_graph('case_statement'); NOTICE: graph "case_statement" has been created diff --git a/regress/sql/expr.sql b/regress/sql/expr.sql index f3f55d3be..40c2116df 100644 --- a/regress/sql/expr.sql +++ b/regress/sql/expr.sql @@ -2823,6 +2823,7 @@ SELECT * FROM cypher('order_by', $$CREATE ({i: false})$$) AS (result agtype); SELECT * FROM cypher('order_by', $$CREATE ({i: {key: 'value'}})$$) AS (result agtype); SELECT * FROM cypher('order_by', $$CREATE ({i: [1]})$$) AS (result agtype); + SELECT * FROM cypher('order_by', $$ MATCH (u) RETURN u.i @@ -2835,6 +2836,35 @@ SELECT * FROM cypher('order_by', $$ ORDER BY u.i DESC $$) AS (i agtype); +-- +-- Test ORDER BY with AS +-- +SELECT * FROM cypher('order_by', $$ CREATE ({name: 'John', age: 38}) $$) AS (result agtype); +SELECT * FROM cypher('order_by', $$ CREATE ({name: 'Jill', age: 23}) $$) AS (result agtype); +SELECT * FROM cypher('order_by', $$ CREATE ({name: 'Ion', age: 34}) $$) AS (result agtype); +SELECT * FROM cypher('order_by', $$ CREATE ({name: 'Mary', age: 57}) $$) AS (result agtype); +SELECT * FROM cypher('order_by', $$ CREATE ({name: 'Jerry', age: 34}) $$) AS (result agtype); + +SELECT * FROM cypher('order_by', $$ + MATCH (u) WHERE EXISTS(u.name) RETURN u.name AS name, u.age AS age ORDER BY name +$$) AS (name agtype, age agtype); + +SELECT * FROM cypher('order_by', $$ + MATCH (u) WHERE EXISTS(u.name) RETURN u.name AS name, u.age AS age ORDER BY name ASC +$$) AS (name agtype, age agtype); + +SELECT * FROM cypher('order_by', $$ + MATCH (u) WHERE EXISTS(u.name) RETURN u.name AS name, u.age AS age ORDER BY name DESC +$$) AS (name agtype, age agtype); + +SELECT * FROM cypher('order_by', $$ + MATCH (u) WHERE EXISTS(u.name) RETURN u.name AS name, u.age AS age ORDER BY age ASC, name DESCENDING +$$) AS (name agtype, age agtype); + +SELECT * FROM cypher('order_by', $$ + MATCH (u) WHERE EXISTS(u.name) RETURN u.name AS name, u.age AS age ORDER BY age DESC, name ASCENDING +$$) AS (name agtype, age agtype); + --CASE SELECT create_graph('case_statement'); SELECT * FROM cypher('case_statement', $$CREATE ({id: 1, i: 1, j: null})-[:connected_to {id: 1, k:0}]->({id: 2, i: 'a', j: 'b'})$$) AS (result agtype); diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index fc10428ae..aac6cfb6a 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -2291,6 +2291,32 @@ static TargetEntry *find_target_list_entry(cypher_parsestate *cpstate, ListCell *lt; TargetEntry *te; + /* + * If the ORDER BY item is a simple identifier, check if it matches + * an alias in the target list. This implements SQL99-compliant + * alias matching for ORDER BY clauses. + */ + if (IsA(node, ColumnRef)) + { + ColumnRef *cref = (ColumnRef *)node; + + if (list_length(cref->fields) == 1) + { + char *name = strVal(linitial(cref->fields)); + + /* Try to match an alias in the target list */ + foreach (lt, *target_list) + { + te = lfirst(lt); + + if (te->resname != NULL && strcmp(te->resname, name) == 0) + { + return te; + } + } + } + } + expr = transform_cypher_expr(cpstate, node, expr_kind); foreach (lt, *target_list) From 62bc7127e55c5be3752b850e7c68028e31658688 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Thu, 11 Dec 2025 07:32:13 -0800 Subject: [PATCH 09/20] Update grammar file for maintainability (#2270) Consolidated duplicate code, added helper functions, and reviewed the grammar file for issues. NOTE: I used an AI tool to review and cleanup the grammar file. I have reviewed all of the work it did. Improvements: 1. Added KEYWORD_STRDUP macro to eliminate hardcoded string lengths 2. Consolidated EXPLAIN statement handling into make_explain_stmt helper 3. Extracted WITH clause validation into validate_return_item_aliases helper 4. Created make_default_return_node helper for subquery return-less logic Benefits: - Reduced code duplication by ~150 lines - Improved maintainability with helper functions - Eliminated manual string length calculations (error-prone) All 29 existing regression tests pass modified: src/backend/parser/cypher_gram.y --- src/backend/parser/cypher_gram.y | 303 ++++++++++++++----------------- 1 file changed, 140 insertions(+), 163 deletions(-) diff --git a/src/backend/parser/cypher_gram.y b/src/backend/parser/cypher_gram.y index 1a3d2167c..8fcad1663 100644 --- a/src/backend/parser/cypher_gram.y +++ b/src/backend/parser/cypher_gram.y @@ -41,6 +41,9 @@ #define YYMALLOC palloc #define YYFREE pfree + +/* Helper macro for keyword string duplication */ +#define KEYWORD_STRDUP(kw) pnstrdup((kw), strlen(kw)) %} %locations @@ -272,6 +275,11 @@ static Node *build_list_comprehension_node(Node *var, Node *expr, Node *where, Node *mapping_expr, int location); +/* helper functions */ +static ExplainStmt *make_explain_stmt(List *options); +static void validate_return_item_aliases(List *items, ag_scanner_t scanner); +static cypher_return *make_default_return_node(int location); + %} %% @@ -296,81 +304,59 @@ stmt: * Throw syntax error in this case. */ if (yychar != YYEOF) + { yyerror(&yylloc, scanner, extra, "syntax error"); + } extra->result = $1; extra->extra = NULL; } | EXPLAIN cypher_stmt semicolon_opt { - ExplainStmt *estmt = NULL; - if (yychar != YYEOF) + { yyerror(&yylloc, scanner, extra, "syntax error"); - + } extra->result = $2; - - estmt = makeNode(ExplainStmt); - estmt->query = NULL; - estmt->options = NIL; - extra->extra = (Node *)estmt; + extra->extra = (Node *)make_explain_stmt(NIL); } | EXPLAIN VERBOSE cypher_stmt semicolon_opt { - ExplainStmt *estmt = NULL; - if (yychar != YYEOF) + { yyerror(&yylloc, scanner, extra, "syntax error"); - + } extra->result = $3; - - estmt = makeNode(ExplainStmt); - estmt->query = NULL; - estmt->options = list_make1(makeDefElem("verbose", NULL, @2));; - extra->extra = (Node *)estmt; + extra->extra = (Node *)make_explain_stmt( + list_make1(makeDefElem("verbose", NULL, @2))); } | EXPLAIN ANALYZE cypher_stmt semicolon_opt { - ExplainStmt *estmt = NULL; - if (yychar != YYEOF) + { yyerror(&yylloc, scanner, extra, "syntax error"); - + } extra->result = $3; - - estmt = makeNode(ExplainStmt); - estmt->query = NULL; - estmt->options = list_make1(makeDefElem("analyze", NULL, @2));; - extra->extra = (Node *)estmt; + extra->extra = (Node *)make_explain_stmt( + list_make1(makeDefElem("analyze", NULL, @2))); } | EXPLAIN ANALYZE VERBOSE cypher_stmt semicolon_opt { - ExplainStmt *estmt = NULL; - if (yychar != YYEOF) yyerror(&yylloc, scanner, extra, "syntax error"); - extra->result = $4; - - estmt = makeNode(ExplainStmt); - estmt->query = NULL; - estmt->options = list_make2(makeDefElem("analyze", NULL, @2), - makeDefElem("verbose", NULL, @3));; - extra->extra = (Node *)estmt; + extra->extra = (Node *)make_explain_stmt( + list_make2(makeDefElem("analyze", NULL, @2), + makeDefElem("verbose", NULL, @3))); } | EXPLAIN '(' utility_option_list ')' cypher_stmt semicolon_opt { - ExplainStmt *estmt = NULL; - if (yychar != YYEOF) + { yyerror(&yylloc, scanner, extra, "syntax error"); - + } extra->result = $5; - - estmt = makeNode(ExplainStmt); - estmt->query = NULL; - estmt->options = $3; - extra->extra = (Node *)estmt; + extra->extra = (Node *)make_explain_stmt($3); } ; @@ -668,57 +654,20 @@ single_subquery: single_subquery_no_return: subquery_part_init reading_clause_list { - ColumnRef *cr; - ResTarget *rt; cypher_return *n; /* * since subqueries allow return-less clauses, we add a * return node manually to reflect that syntax */ - cr = makeNode(ColumnRef); - cr->fields = list_make1(makeNode(A_Star)); - cr->location = @1; - - rt = makeNode(ResTarget); - rt->name = NULL; - rt->indirection = NIL; - rt->val = (Node *)cr; - rt->location = @1; - - n = make_ag_node(cypher_return); - n->distinct = false; - n->items = list_make1((Node *)rt); - n->order_by = NULL; - n->skip = NULL; - n->limit = NULL; - + n = make_default_return_node(@1); $$ = list_concat($1, lappend($2, n)); - } - | subquery_pattern + | subquery_pattern { - ColumnRef *cr; - ResTarget *rt; cypher_return *n; - cr = makeNode(ColumnRef); - cr->fields = list_make1(makeNode(A_Star)); - cr->location = @1; - - rt = makeNode(ResTarget); - rt->name = NULL; - rt->indirection = NIL; - rt->val = (Node *)cr; - rt->location = @1; - - n = make_ag_node(cypher_return); - n->distinct = false; - n->items = list_make1((Node *)rt); - n->order_by = NULL; - n->skip = NULL; - n->limit = NULL; - + n = make_default_return_node(@1); $$ = lappend(list_make1($1), n); } ; @@ -974,24 +923,10 @@ limit_opt: with: WITH DISTINCT return_item_list order_by_opt skip_opt limit_opt where_opt { - ListCell *li; cypher_with *n; /* check expressions are aliased */ - foreach(li, $3) - { - ResTarget *item = lfirst(li); - - /* variable does not have to be aliased */ - if (IsA(item->val, ColumnRef) || item->name) - continue; - - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("expression item must be aliased"), - errhint("Items can be aliased by using AS."), - ag_scanner_errposition(item->location, scanner))); - } + validate_return_item_aliases($3, scanner); n = make_ag_node(cypher_with); n->distinct = true; @@ -1003,27 +938,12 @@ with: $$ = (Node *)n; } - | WITH return_item_list order_by_opt skip_opt limit_opt - where_opt + | WITH return_item_list order_by_opt skip_opt limit_opt where_opt { - ListCell *li; cypher_with *n; /* check expressions are aliased */ - foreach (li, $2) - { - ResTarget *item = lfirst(li); - - /* variable does not have to be aliased */ - if (IsA(item->val, ColumnRef) || item->name) - continue; - - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("expression item must be aliased"), - errhint("Items can be aliased by using AS."), - ag_scanner_errposition(item->location, scanner))); - } + validate_return_item_aliases($2, scanner); n = make_ag_node(cypher_with); n->distinct = false; @@ -2465,58 +2385,58 @@ qual_op: */ safe_keywords: - ALL { $$ = pnstrdup($1, 3); } - | ANALYZE { $$ = pnstrdup($1, 7); } - | AND { $$ = pnstrdup($1, 3); } - | AS { $$ = pnstrdup($1, 2); } - | ASC { $$ = pnstrdup($1, 3); } - | ASCENDING { $$ = pnstrdup($1, 9); } - | BY { $$ = pnstrdup($1, 2); } - | CALL { $$ = pnstrdup($1, 4); } - | CASE { $$ = pnstrdup($1, 4); } - | COALESCE { $$ = pnstrdup($1, 8); } - | CONTAINS { $$ = pnstrdup($1, 8); } - | COUNT { $$ = pnstrdup($1 ,5); } - | CREATE { $$ = pnstrdup($1, 6); } - | DELETE { $$ = pnstrdup($1, 6); } - | DESC { $$ = pnstrdup($1, 4); } - | DESCENDING { $$ = pnstrdup($1, 10); } - | DETACH { $$ = pnstrdup($1, 6); } - | DISTINCT { $$ = pnstrdup($1, 8); } - | ELSE { $$ = pnstrdup($1, 4); } - | ENDS { $$ = pnstrdup($1, 4); } - | EXISTS { $$ = pnstrdup($1, 6); } - | EXPLAIN { $$ = pnstrdup($1, 7); } - | IN { $$ = pnstrdup($1, 2); } - | IS { $$ = pnstrdup($1, 2); } - | LIMIT { $$ = pnstrdup($1, 6); } - | MATCH { $$ = pnstrdup($1, 6); } - | MERGE { $$ = pnstrdup($1, 6); } - | NOT { $$ = pnstrdup($1, 3); } - | OPERATOR { $$ = pnstrdup($1, 8); } - | OPTIONAL { $$ = pnstrdup($1, 8); } - | OR { $$ = pnstrdup($1, 2); } - | ORDER { $$ = pnstrdup($1, 5); } - | REMOVE { $$ = pnstrdup($1, 6); } - | RETURN { $$ = pnstrdup($1, 6); } - | SET { $$ = pnstrdup($1, 3); } - | SKIP { $$ = pnstrdup($1, 4); } - | STARTS { $$ = pnstrdup($1, 6); } - | THEN { $$ = pnstrdup($1, 4); } - | UNION { $$ = pnstrdup($1, 5); } - | WHEN { $$ = pnstrdup($1, 4); } - | VERBOSE { $$ = pnstrdup($1, 7); } - | WHERE { $$ = pnstrdup($1, 5); } - | WITH { $$ = pnstrdup($1, 4); } - | XOR { $$ = pnstrdup($1, 3); } - | YIELD { $$ = pnstrdup($1, 5); } + ALL { $$ = KEYWORD_STRDUP($1); } + | ANALYZE { $$ = KEYWORD_STRDUP($1); } + | AND { $$ = KEYWORD_STRDUP($1); } + | AS { $$ = KEYWORD_STRDUP($1); } + | ASC { $$ = KEYWORD_STRDUP($1); } + | ASCENDING { $$ = KEYWORD_STRDUP($1); } + | BY { $$ = KEYWORD_STRDUP($1); } + | CALL { $$ = KEYWORD_STRDUP($1); } + | CASE { $$ = KEYWORD_STRDUP($1); } + | COALESCE { $$ = KEYWORD_STRDUP($1); } + | CONTAINS { $$ = KEYWORD_STRDUP($1); } + | COUNT { $$ = KEYWORD_STRDUP($1); } + | CREATE { $$ = KEYWORD_STRDUP($1); } + | DELETE { $$ = KEYWORD_STRDUP($1); } + | DESC { $$ = KEYWORD_STRDUP($1); } + | DESCENDING { $$ = KEYWORD_STRDUP($1); } + | DETACH { $$ = KEYWORD_STRDUP($1); } + | DISTINCT { $$ = KEYWORD_STRDUP($1); } + | ELSE { $$ = KEYWORD_STRDUP($1); } + | ENDS { $$ = KEYWORD_STRDUP($1); } + | EXISTS { $$ = KEYWORD_STRDUP($1); } + | EXPLAIN { $$ = KEYWORD_STRDUP($1); } + | IN { $$ = KEYWORD_STRDUP($1); } + | IS { $$ = KEYWORD_STRDUP($1); } + | LIMIT { $$ = KEYWORD_STRDUP($1); } + | MATCH { $$ = KEYWORD_STRDUP($1); } + | MERGE { $$ = KEYWORD_STRDUP($1); } + | NOT { $$ = KEYWORD_STRDUP($1); } + | OPERATOR { $$ = KEYWORD_STRDUP($1); } + | OPTIONAL { $$ = KEYWORD_STRDUP($1); } + | OR { $$ = KEYWORD_STRDUP($1); } + | ORDER { $$ = KEYWORD_STRDUP($1); } + | REMOVE { $$ = KEYWORD_STRDUP($1); } + | RETURN { $$ = KEYWORD_STRDUP($1); } + | SET { $$ = KEYWORD_STRDUP($1); } + | SKIP { $$ = KEYWORD_STRDUP($1); } + | STARTS { $$ = KEYWORD_STRDUP($1); } + | THEN { $$ = KEYWORD_STRDUP($1); } + | UNION { $$ = KEYWORD_STRDUP($1); } + | WHEN { $$ = KEYWORD_STRDUP($1); } + | VERBOSE { $$ = KEYWORD_STRDUP($1); } + | WHERE { $$ = KEYWORD_STRDUP($1); } + | WITH { $$ = KEYWORD_STRDUP($1); } + | XOR { $$ = KEYWORD_STRDUP($1); } + | YIELD { $$ = KEYWORD_STRDUP($1); } ; conflicted_keywords: - END_P { $$ = pnstrdup($1, 5); } - | FALSE_P { $$ = pnstrdup($1, 7); } - | NULL_P { $$ = pnstrdup($1, 6); } - | TRUE_P { $$ = pnstrdup($1, 6); } + END_P { $$ = KEYWORD_STRDUP($1); } + | FALSE_P { $$ = KEYWORD_STRDUP($1); } + | NULL_P { $$ = KEYWORD_STRDUP($1); } + | TRUE_P { $$ = KEYWORD_STRDUP($1); } ; %% @@ -3386,7 +3306,7 @@ static Node *build_list_comprehension_node(Node *var, Node *expr, /* * Build an ARRAY sublink and attach list_comp as sub-select, - * it will be transformed in to query tree by us and reattached for + * it will be transformed in to query tree by us and reattached for * pg to process. */ sub = makeNode(SubLink); @@ -3398,3 +3318,60 @@ static Node *build_list_comprehension_node(Node *var, Node *expr, return (Node *) node_to_agtype((Node *)sub, "agtype[]", location); } + +/* Helper function to create an ExplainStmt node */ +static ExplainStmt *make_explain_stmt(List *options) +{ + ExplainStmt *estmt = makeNode(ExplainStmt); + estmt->query = NULL; + estmt->options = options; + return estmt; +} + +/* Helper function to validate that return items are properly aliased */ +static void validate_return_item_aliases(List *items, ag_scanner_t scanner) +{ + ListCell *li; + + foreach(li, items) + { + ResTarget *item = lfirst(li); + + /* variable does not have to be aliased */ + if (IsA(item->val, ColumnRef) || item->name) + continue; + + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("expression item must be aliased"), + errhint("Items can be aliased by using AS."), + ag_scanner_errposition(item->location, scanner))); + } +} + +/* Helper function to create a default return node (RETURN *) */ +static cypher_return *make_default_return_node(int location) +{ + ColumnRef *cr; + ResTarget *rt; + cypher_return *n; + + cr = makeNode(ColumnRef); + cr->fields = list_make1(makeNode(A_Star)); + cr->location = location; + + rt = makeNode(ResTarget); + rt->name = NULL; + rt->indirection = NIL; + rt->val = (Node *)cr; + rt->location = location; + + n = make_ag_node(cypher_return); + n->distinct = false; + n->items = list_make1((Node *)rt); + n->order_by = NULL; + n->skip = NULL; + n->limit = NULL; + + return n; +} From 6d2cb387e51f0e57272139e81930468ed443fdfb Mon Sep 17 00:00:00 2001 From: jsell-rh Date: Thu, 11 Dec 2025 10:56:37 -0500 Subject: [PATCH 10/20] Convert string to raw string to remove invalid escape sequence warning (#2267) - Changed '\s' to r'\s' --- drivers/python/age/age.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/python/age/age.py b/drivers/python/age/age.py index 817cc6e5a..b1aa82158 100644 --- a/drivers/python/age/age.py +++ b/drivers/python/age/age.py @@ -26,7 +26,7 @@ _EXCEPTION_NoConnection = NoConnection() _EXCEPTION_GraphNotSet = GraphNotSet() -WHITESPACE = re.compile('\s') +WHITESPACE = re.compile(r'\s') class AgeDumper(psycopg.adapt.Dumper): @@ -233,3 +233,4 @@ def cypher(self, cursor:psycopg.cursor, cypherStmt:str, cols:list=None, params:t # def queryCypher(self, cypherStmt:str, columns:list=None , params:tuple=None) -> psycopg.cursor : # return queryCypher(self.connection, self.graphName, cypherStmt, columns, params) + From ea3c1cb44ec071015f7faea2f2dcc2fa8b8e7981 Mon Sep 17 00:00:00 2001 From: Muhammad Taha Naveed Date: Thu, 11 Dec 2025 21:51:12 +0500 Subject: [PATCH 11/20] Migrate python driver configuration to pyproject.toml (#2272) - Add pyproject.toml with package configuration - Simplify setup.py to minimal backward-compatible wrapper. - Updated CI workflow and .gitignore. - Resolves warning about using setup.py directly. --- .github/workflows/python-driver.yaml | 4 +-- .gitignore | 2 ++ drivers/python/README.md | 2 +- drivers/python/pyproject.toml | 48 ++++++++++++++++++++++++++++ drivers/python/setup.py | 28 +++------------- 5 files changed, 58 insertions(+), 26 deletions(-) create mode 100644 drivers/python/pyproject.toml diff --git a/.github/workflows/python-driver.yaml b/.github/workflows/python-driver.yaml index f0b4cffed..abd89915e 100644 --- a/.github/workflows/python-driver.yaml +++ b/.github/workflows/python-driver.yaml @@ -24,7 +24,7 @@ jobs: - name: Set up python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.12' - name: Install pre-requisites run: | @@ -33,7 +33,7 @@ jobs: - name: Build run: | - python setup.py install + pip install . - name: Test run: | diff --git a/.gitignore b/.gitignore index a8e809dda..03923b03e 100644 --- a/.gitignore +++ b/.gitignore @@ -11,5 +11,7 @@ age--*.*.*.sql !age--*--*sql __pycache__ **/__pycache__ +**/.venv +**/apache_age_python.egg-info drivers/python/build diff --git a/drivers/python/README.md b/drivers/python/README.md index c895fe76d..c958de479 100644 --- a/drivers/python/README.md +++ b/drivers/python/README.md @@ -62,7 +62,7 @@ python -m unittest -v test_agtypes.py ### Build from source ``` -python setup.py install +pip install . ``` ### For more information about [Apache AGE](https://age.apache.org/) diff --git a/drivers/python/pyproject.toml b/drivers/python/pyproject.toml new file mode 100644 index 000000000..18112381c --- /dev/null +++ b/drivers/python/pyproject.toml @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "apache-age-python" +version = "0.0.7" +description = "Python driver support for Apache AGE" +readme = "README.md" +requires-python = ">=3.9" +license = "Apache-2.0" +keywords = ["Graph Database", "Apache AGE", "PostgreSQL"] +authors = [ + {name = "Ikchan Kwon, Apache AGE", email = "dev-subscribe@age.apache.org"} +] +classifiers = [ + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", +] +dependencies = [ + "psycopg", + "antlr4-python3-runtime==4.11.1", +] + +[project.urls] +Homepage = "https://github.com/apache/age/tree/master/drivers/python" +Download = "https://github.com/apache/age/releases" + +[tool.setuptools] +packages = ["age", "age.gen", "age.networkx"] diff --git a/drivers/python/setup.py b/drivers/python/setup.py index 1da49d9cb..d0eed26be 100644 --- a/drivers/python/setup.py +++ b/drivers/python/setup.py @@ -13,28 +13,10 @@ # specific language governing permissions and limitations # under the License. -from setuptools import setup, find_packages -from age import VERSION +# This setup.py is maintained for backward compatibility. +# All package configuration is in pyproject.toml. For installation, +# use: pip install . -with open("README.md", "r", encoding='utf8') as fh: - long_description = fh.read() +from setuptools import setup -setup( - name = 'apache-age-python', - version = '0.0.7', - description = 'Python driver support for Apache AGE', - long_description=long_description, - long_description_content_type="text/markdown", - author = 'Ikchan Kwon, Apache AGE', - author_email = 'dev-subscribe@age.apache.org', - url = 'https://github.com/apache/age/tree/master/drivers/python', - download_url = 'https://github.com/apache/age/releases' , - license = 'Apache2.0', - install_requires = [ 'psycopg', 'antlr4-python3-runtime==4.11.1'], - packages = ['age', 'age.gen','age.networkx'], - keywords = ['Graph Database', 'Apache AGE', 'PostgreSQL'], - python_requires = '>=3.9', - classifiers = [ - 'Programming Language :: Python :: 3.9' - ] -) +setup() From 354f45193939225c3ffd3258354e4c3138c52c78 Mon Sep 17 00:00:00 2001 From: Arthur Nascimento Date: Tue, 6 Jan 2026 13:36:49 -0300 Subject: [PATCH 12/20] Makefile: fix race condition on cypher_gram_def.h (#2273) The file cypher_gram.c generates cypher_gram_def.h, which is directly necessary for cypher_parser.o and cypher_keywords.o and their respective .bc files. But that direct dependency is not reflected in the Makefile, which only had the indirect dependency of .o on .c. So on high parallel builds, the .h may not have been generated by bison yet. Additionally, the .bc files should have the same dependencies as the .o files, but those are lacking. Here is an example output where the .bc file fails to build, as it was running concurrently with the bison instance that was about to finalize cypher_gram_def.h: In file included from src/backend/parser/cypher_parser.c:24: clang-17 -Wno-ignored-attributes -fno-strict-aliasing -fwrapv -fexcess-precision=standard -Wno-unused-command-line-argument -Wno-compound-token-split-by-macro -O2 -I.//src/include -I.//src/include/parser -I. -I./ -I/usr/pgsql-17/include/server -I/usr/pgsql-17/include/internal -D_GNU_SOURCE -I/usr/include -I/usr/include/libxml2 -flto=thin -emit-llvm -c -o src/backend/parser/cypher_parser.bc src/backend/parser/cypher_parser.c .//src/include/parser/cypher_gram.h:65:10: fatal error: 'parser/cypher_gram_def.h' file not found 65 | #include "parser/cypher_gram_def.h" | ^~~~~~~~~~~~~~~~~~~~~~~~~~ 1 error generated. make: *** [/usr/pgsql-17/lib/pgxs/src/makefiles/../../src/Makefile.global:1085: src/backend/parser/cypher_parser.bc] Error 1 make: *** Waiting for unfinished jobs.... gcc -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wimplicit-fallthrough=3 -Wshadow=compatible-local -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -Wno-format-truncation -O2 -g -fmessage-length=0 -D_FORTIFY_SOURCE=2 -fstack-protector -funwind-tables -fasynchronous-unwind-tables -fPIC -fvisibility=hidden -I.//src/include -I.//src/include/parser -I. -I./ -I/usr/pgsql-17/include/server -I/usr/pgsql-17/include/internal -D_GNU_SOURCE -I/usr/include -I/usr/include/libxml2 -c -o src/backend/catalog/ag_label.o src/backend/catalog/ag_label.c /usr/bin/bison -Wno-deprecated --defines=src/include/parser/cypher_gram_def.h -o src/backend/parser/cypher_gram.c src/backend/parser/cypher_gram.y Previously, cypher_parser.o was missing the dependency, so it could start before cypher_gram_def.h was available: Considering target file 'src/backend/parser/cypher_parser.o'. File 'src/backend/parser/cypher_parser.o' does not exist. Considering target file 'src/backend/parser/cypher_parser.c'. File 'src/backend/parser/cypher_parser.c' was considered already. Considering target file 'src/backend/parser/cypher_gram.c'. File 'src/backend/parser/cypher_gram.c' was considered already. Finished prerequisites of target file 'src/backend/parser/cypher_parser.o'. Must remake target 'src/backend/parser/cypher_parser.o'. As well as cypher_parser.bc, missing the dependency on cypher_gram_def.h: Considering target file 'src/backend/parser/cypher_parser.bc'. File 'src/backend/parser/cypher_parser.bc' does not exist. Considering target file 'src/backend/parser/cypher_parser.c'. File 'src/backend/parser/cypher_parser.c' was considered already. Finished prerequisites of target file 'src/backend/parser/cypher_parser.bc'. Must remake target 'src/backend/parser/cypher_parser.bc'. Now cypher_parser.o correctly depends on cypher_gram_def.h: Considering target file 'src/backend/parser/cypher_parser.o'. File 'src/backend/parser/cypher_parser.o' does not exist. Considering target file 'src/backend/parser/cypher_parser.c'. File 'src/backend/parser/cypher_parser.c' was considered already. Considering target file 'src/backend/parser/cypher_gram.c'. File 'src/backend/parser/cypher_gram.c' was considered already. Considering target file 'src/include/parser/cypher_gram_def.h'. File 'src/include/parser/cypher_gram_def.h' was considered already. Finished prerequisites of target file 'src/backend/parser/cypher_parser.o'. Must remake target 'src/backend/parser/cypher_parser.o'. And cypher_parser.bc correctly depends on cypher_gram_def.h as well: Considering target file 'src/backend/parser/cypher_parser.bc'. File 'src/backend/parser/cypher_parser.bc' does not exist. Considering target file 'src/backend/parser/cypher_parser.c'. File 'src/backend/parser/cypher_parser.c' was considered already. Considering target file 'src/backend/parser/cypher_gram.c'. File 'src/backend/parser/cypher_gram.c' was considered already. Considering target file 'src/include/parser/cypher_gram_def.h'. File 'src/include/parser/cypher_gram_def.h' was considered already. Finished prerequisites of target file 'src/backend/parser/cypher_parser.bc'. Must remake target 'src/backend/parser/cypher_parser.bc'. --- Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 3e73f3e68..0392d8b85 100644 --- a/Makefile +++ b/Makefile @@ -147,8 +147,10 @@ src/include/parser/cypher_gram_def.h: src/backend/parser/cypher_gram.c src/backend/parser/cypher_gram.c: BISONFLAGS += --defines=src/include/parser/cypher_gram_def.h -src/backend/parser/cypher_parser.o: src/backend/parser/cypher_gram.c -src/backend/parser/cypher_keywords.o: src/backend/parser/cypher_gram.c +src/backend/parser/cypher_parser.o: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h +src/backend/parser/cypher_parser.bc: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h +src/backend/parser/cypher_keywords.o: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h +src/backend/parser/cypher_keywords.bc: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h $(age_sql): @cat $(SQLS) > $@ From 6db9d9171ace541bd6b8d75771aefcbd92b6c807 Mon Sep 17 00:00:00 2001 From: M15terHyde <59905806+M15terHyde@users.noreply.github.com> Date: Tue, 6 Jan 2026 12:52:33 -0600 Subject: [PATCH 13/20] Revise README for Python driver updates (#2298) Updated README to from psycopg2 to psycopg3 (psycopg) Resolved Conflicts: drivers/python/README.md --- drivers/python/README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/python/README.md b/drivers/python/README.md index c958de479..94d71a0cc 100644 --- a/drivers/python/README.md +++ b/drivers/python/README.md @@ -28,11 +28,11 @@ AGType parser and driver support for [Apache AGE](https://age.apache.org/), grap ### Features * Unmarshal AGE result data(AGType) to Vertex, Edge, Path -* Cypher query support for Psycopg2 PostgreSQL driver (enables to use cypher queries directly) +* Cypher query support for Psycopg3 PostgreSQL driver (enables to use cypher queries directly) ### Prerequisites * over Python 3.9 -* This module runs on [psycopg2](https://www.psycopg.org/) and [antlr4-python3](https://pypi.org/project/antlr4-python3-runtime/) +* This module runs on [psycopg3](https://www.psycopg.org/) and [antlr4-python3](https://pypi.org/project/antlr4-python3-runtime/) ``` sudo apt-get update sudo apt-get install python3-dev libpq-dev @@ -80,7 +80,7 @@ SET search_path = ag_catalog, "$user", public; ``` ### Usage -* If you are familiar with Psycopg2 driver : Go to [Jupyter Notebook : Basic Sample](samples/apache-age-basic.ipynb) +* If you are not familiar with Psycopg driver : Go to [Jupyter Notebook : Basic Sample](samples/apache-age-basic.ipynb) * Simpler way to access Apache AGE [AGE Sample](samples/apache-age-note.ipynb) in Samples. * Agtype converting samples: [Agtype Sample](samples/apache-age-agtypes.ipynb) in Samples. @@ -119,7 +119,7 @@ Here the following value required Insert From networkx directed graph into an Age database. #### Parameters -- `connection` (psycopg2.connect): Connection object to the Age database. +- `connection` (psycopg.connect): Connection object to the AGE database. - `G` (networkx.DiGraph): Networkx directed graph to be converted and inserted. @@ -152,7 +152,7 @@ Converts data from a Apache AGE graph database into a Networkx directed graph. #### Parameters -- `connection` (psycopg2.connect): Connection object to the PostgreSQL database. +- `connection` (psycopg.connect): Connection object to the PostgreSQL database. - `graphName` (str): Name of the graph. - `G` (None | nx.DiGraph): Optional Networkx directed graph. If provided, the data will be added to this graph. - `query` (str | None): Optional Cypher query to retrieve data from the database. @@ -167,3 +167,4 @@ Converts data from a Apache AGE graph database into a Networkx directed graph. # Call the function to convert data into a Networkx graph graph = age_to_networkx(connection, graphName="MyGraph" ) ``` + From 3baaaaae7da17bfed15f525275e5c065e99b8e51 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Fri, 9 Jan 2026 12:27:47 -0800 Subject: [PATCH 14/20] Fix Issue 2289: handle empty list in IN expression (#2294) NOTE: This PR was created with AI tools and a human. When evaluating 'x IN []' with an empty list, the transform_AEXPR_IN function would return NULL because no expressions were processed. This caused a 'cache lookup failed for type 0' error downstream. This fix adds an early check for the empty list case: - 'x IN []' returns false (nothing can be in an empty list) Additional NOTE: Cypher does not have 'NOT IN' syntax. To check if a value is NOT in a list, use 'NOT (x IN list)'. The NOT operator will invert the false from an empty list to true as expected. The fix returns a boolean constant directly, avoiding the NULL result that caused the type lookup failure. Added regression tests. modified: regress/expected/expr.out modified: regress/sql/expr.sql modified: src/backend/parser/cypher_expr.c --- regress/expected/expr.out | 72 ++++++++++++++++++++++++++++++++ regress/sql/expr.sql | 23 ++++++++++ src/backend/parser/cypher_expr.c | 30 ++++++++++++- 3 files changed, 123 insertions(+), 2 deletions(-) diff --git a/regress/expected/expr.out b/regress/expected/expr.out index 33498b06b..052caf777 100644 --- a/regress/expected/expr.out +++ b/regress/expected/expr.out @@ -319,6 +319,50 @@ $$RETURN 1 IN [[null]]$$) AS r(c boolean); f (1 row) +-- empty list: x IN [] should always return false +SELECT * FROM cypher('expr', +$$RETURN 1 IN []$$) AS r(c boolean); + c +--- + f +(1 row) + +SELECT * FROM cypher('expr', +$$RETURN 'a' IN []$$) AS r(c boolean); + c +--- + f +(1 row) + +SELECT * FROM cypher('expr', +$$RETURN null IN []$$) AS r(c boolean); + c +--- + f +(1 row) + +SELECT * FROM cypher('expr', +$$RETURN [1,2,3] IN []$$) AS r(c boolean); + c +--- + f +(1 row) + +-- NOT (x IN []) should always return true +SELECT * FROM cypher('expr', +$$RETURN NOT (1 IN [])$$) AS r(c boolean); + c +--- + t +(1 row) + +SELECT * FROM cypher('expr', +$$RETURN NOT ('a' IN [])$$) AS r(c boolean); + c +--- + t +(1 row) + -- should error - ERROR: object of IN must be a list SELECT * FROM cypher('expr', $$RETURN null IN 'str' $$) AS r(c boolean); @@ -9155,9 +9199,37 @@ ERROR: could not find rte for x LINE 2: ...({ a0:COUNT { MATCH () WHERE CASE WHEN true THEN (x IS NULL)... ^ HINT: variable x does not exist within scope of usage +-- +-- Issue 2289: 1 IN [] causes cache lookup failed for type 0 +-- +-- Additional test cases were added above to the IN operator +-- +SELECT * FROM create_graph('issue_2289'); +NOTICE: graph "issue_2289" has been created + create_graph +-------------- + +(1 row) + +SELECT * FROM cypher('issue_2289', $$ RETURN (1 IN []) AS v $$) AS (v agtype); + v +------- + false +(1 row) + -- -- Cleanup -- +SELECT * FROM drop_graph('issue_2289', true); +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to table issue_2289._ag_label_vertex +drop cascades to table issue_2289._ag_label_edge +NOTICE: graph "issue_2289" has been dropped + drop_graph +------------ + +(1 row) + SELECT * FROM drop_graph('issue_2263', true); NOTICE: drop cascades to 2 other objects DETAIL: drop cascades to table issue_2263._ag_label_vertex diff --git a/regress/sql/expr.sql b/regress/sql/expr.sql index 40c2116df..f6e369d65 100644 --- a/regress/sql/expr.sql +++ b/regress/sql/expr.sql @@ -157,6 +157,20 @@ SELECT * FROM cypher('expr', $$RETURN 1 in [[1]]$$) AS r(c boolean); SELECT * FROM cypher('expr', $$RETURN 1 IN [[null]]$$) AS r(c boolean); +-- empty list: x IN [] should always return false +SELECT * FROM cypher('expr', +$$RETURN 1 IN []$$) AS r(c boolean); +SELECT * FROM cypher('expr', +$$RETURN 'a' IN []$$) AS r(c boolean); +SELECT * FROM cypher('expr', +$$RETURN null IN []$$) AS r(c boolean); +SELECT * FROM cypher('expr', +$$RETURN [1,2,3] IN []$$) AS r(c boolean); +-- NOT (x IN []) should always return true +SELECT * FROM cypher('expr', +$$RETURN NOT (1 IN [])$$) AS r(c boolean); +SELECT * FROM cypher('expr', +$$RETURN NOT ('a' IN [])$$) AS r(c boolean); -- should error - ERROR: object of IN must be a list SELECT * FROM cypher('expr', $$RETURN null IN 'str' $$) AS r(c boolean); @@ -3690,9 +3704,18 @@ SELECT * FROM cypher('issue_2263', $$ CREATE x = (), ({ a0:COUNT { MATCH () WHERE CASE WHEN true THEN (x IS NULL) END RETURN 0 } }) $$) AS (out agtype); +-- +-- Issue 2289: 1 IN [] causes cache lookup failed for type 0 +-- +-- Additional test cases were added above to the IN operator +-- +SELECT * FROM create_graph('issue_2289'); +SELECT * FROM cypher('issue_2289', $$ RETURN (1 IN []) AS v $$) AS (v agtype); + -- -- Cleanup -- +SELECT * FROM drop_graph('issue_2289', true); SELECT * FROM drop_graph('issue_2263', true); SELECT * FROM drop_graph('issue_1988', true); SELECT * FROM drop_graph('issue_1953', true); diff --git a/src/backend/parser/cypher_expr.c b/src/backend/parser/cypher_expr.c index 5f4de86b9..fc0335def 100644 --- a/src/backend/parser/cypher_expr.c +++ b/src/backend/parser/cypher_expr.c @@ -600,6 +600,34 @@ static Node *transform_AEXPR_IN(cypher_parsestate *cpstate, A_Expr *a) Assert(is_ag_node(a->rexpr, cypher_list)); + rexpr = (cypher_list *)a->rexpr; + + /* + * Handle empty list case: x IN [] is always false, x NOT IN [] is always true. + * We need to check this before processing to avoid returning NULL result + * which causes "cache lookup failed for type 0" error. + */ + if (rexpr->elems == NIL || list_length((List *)rexpr->elems) == 0) + { + Datum bool_value; + Const *const_result; + + /* If operator is <> (NOT IN), result is true; otherwise (IN) result is false */ + if (strcmp(strVal(linitial(a->name)), "<>") == 0) + { + bool_value = BoolGetDatum(true); + } + else + { + bool_value = BoolGetDatum(false); + } + + const_result = makeConst(BOOLOID, -1, InvalidOid, sizeof(bool), + bool_value, false, true); + + return (Node *)const_result; + } + /* If the operator is <>, combine with AND not OR. */ if (strcmp(strVal(linitial(a->name)), "<>") == 0) { @@ -614,8 +642,6 @@ static Node *transform_AEXPR_IN(cypher_parsestate *cpstate, A_Expr *a) rexprs = rvars = rnonvars = NIL; - rexpr = (cypher_list *)a->rexpr; - foreach(l, (List *) rexpr->elems) { Node *rexpr = transform_cypher_expr_recurse(cpstate, lfirst(l)); From 732d80c5639abdf8bf21b6e9a656a78619f57d77 Mon Sep 17 00:00:00 2001 From: Jean-Paul Abbuehl Date: Mon, 12 Jan 2026 21:10:08 +0100 Subject: [PATCH 15/20] feat: Add 32-bit platform support for graphid type (#2286) * feat: Add 32-bit platform support for graphid type This enables AGE to work on 32-bit platforms including WebAssembly (WASM). Problem: - graphid is int64 (8 bytes) with PASSEDBYVALUE - On 32-bit systems, Datum is only 4 bytes - PostgreSQL rejects pass-by-value types larger than Datum Solution: - Makefile-only change (no C code modifications) - When SIZEOF_DATUM=4 is passed to make, strip PASSEDBYVALUE from the generated SQL - If not specified, normal 64-bit behavior is preserved (PASSEDBYVALUE kept) This change is backward compatible: - 64-bit systems continue using pass-by-value - 32-bit systems now work with pass-by-reference Motivation: PGlite (PostgreSQL compiled to WebAssembly) uses 32-bit pointers and requires this patch to run AGE. Tested on: - 64-bit Linux (regression tests pass) - 32-bit WebAssembly via PGlite (all operations work) Co-authored-by: abbuehlj --- Makefile | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0392d8b85..17f2ed653 100644 --- a/Makefile +++ b/Makefile @@ -138,6 +138,10 @@ PG_CONFIG ?= pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) +# 32-bit platform support: pass SIZEOF_DATUM=4 to enable (e.g., make SIZEOF_DATUM=4) +# When SIZEOF_DATUM=4, PASSEDBYVALUE is stripped from graphid type for pass-by-reference. +# If not specified, normal 64-bit behavior is used (PASSEDBYVALUE preserved). + src/backend/parser/cypher_keywords.o: src/include/parser/cypher_kwlist_d.h src/include/parser/cypher_kwlist_d.h: src/include/parser/cypher_kwlist.h $(GEN_KEYWORDLIST_DEPS) @@ -152,8 +156,14 @@ src/backend/parser/cypher_parser.bc: src/backend/parser/cypher_gram.c src/includ src/backend/parser/cypher_keywords.o: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h src/backend/parser/cypher_keywords.bc: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h -$(age_sql): +# Strip PASSEDBYVALUE on 32-bit (SIZEOF_DATUM=4) for graphid pass-by-reference +$(age_sql): $(SQLS) @cat $(SQLS) > $@ +ifeq ($(SIZEOF_DATUM),4) + @echo "32-bit build: removing PASSEDBYVALUE from graphid type" + @sed 's/^ PASSEDBYVALUE,$$/ -- PASSEDBYVALUE removed for 32-bit (see Makefile)/' $@ > $@.tmp && mv $@.tmp $@ + @grep -q 'PASSEDBYVALUE removed for 32-bit' $@ || { echo "Error: PASSEDBYVALUE replacement failed in $@"; exit 1; } +endif src/backend/parser/ag_scanner.c: FLEX_NO_BACKUP=yes From ee9bf06010efce6e6f8ad12294955df66bfd0aec Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Fri, 16 Jan 2026 15:12:22 -0800 Subject: [PATCH 16/20] Optimize vertex/edge field access with direct array indexing (#2302) NOTE: This PR was created using AI tools and a human. Leverage deterministic key ordering from uniqueify_agtype_object() to access vertex/edge fields in O(1) instead of O(log n) binary search. Fields are sorted by key length, giving fixed positions: - Vertex: id(0), label(1), properties(2) - Edge: id(0), label(1), end_id(2), start_id(3), properties(4) Changes: - Add field index constants and accessor macros to agtype.h - Update age_id(), age_start_id(), age_end_id(), age_label(), age_properties() to use direct field access - Add fill_agtype_value_no_copy() for read-only scalar extraction without memory allocation - Add compare_agtype_scalar_containers() fast path for scalar comparison - Update hash_agtype_value(), equals_agtype_scalar_value(), and compare_agtype_scalar_values() to use direct field access macros - Add fast path in get_one_agtype_from_variadic_args() bypassing extract_variadic_args() for single argument case - Add comprehensive regression test (30 tests) Performance impact: Improves ORDER BY, hash joins, aggregations, and Cypher functions (id, start_id, end_id, label, properties) on vertices and edges. All previous regression tests were not impacted. Additional regression test added to enhance coverage. modified: Makefile new file: regress/expected/direct_field_access.out new file: regress/sql/direct_field_access.sql modified: src/backend/utils/adt/agtype.c modified: src/backend/utils/adt/agtype_util.c modified: src/include/utils/agtype.h --- Makefile | 3 +- regress/expected/direct_field_access.out | 535 +++++++++++++++++++++++ regress/sql/direct_field_access.sql | 319 ++++++++++++++ src/backend/utils/adt/agtype.c | 136 +++++- src/backend/utils/adt/agtype_util.c | 237 +++++++++- src/include/utils/agtype.h | 103 +++++ 6 files changed, 1304 insertions(+), 29 deletions(-) create mode 100644 regress/expected/direct_field_access.out create mode 100644 regress/sql/direct_field_access.sql diff --git a/Makefile b/Makefile index 17f2ed653..ffad7d6af 100644 --- a/Makefile +++ b/Makefile @@ -112,7 +112,8 @@ REGRESS = scan \ name_validation \ jsonb_operators \ list_comprehension \ - map_projection + map_projection \ + direct_field_access ifneq ($(EXTRA_TESTS),) REGRESS += $(EXTRA_TESTS) diff --git a/regress/expected/direct_field_access.out b/regress/expected/direct_field_access.out new file mode 100644 index 000000000..0a059cdd9 --- /dev/null +++ b/regress/expected/direct_field_access.out @@ -0,0 +1,535 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Direct Field Access Optimizations Test + * + * Tests for optimizations that directly access agtype fields without + * using the full iterator machinery or binary search: + * + * 1. fill_agtype_value_no_copy() - Read-only access without memory allocation + * 2. compare_agtype_scalar_containers() - Fast path for scalar comparisons + * 3. Direct pairs[0] access for vertex/edge id comparison + * 4. Fast path in get_one_agtype_from_variadic_args() + */ +LOAD 'age'; +SET search_path TO ag_catalog; +SELECT create_graph('direct_access'); +NOTICE: graph "direct_access" has been created + create_graph +-------------- + +(1 row) + +-- +-- Section 1: Scalar Comparison Fast Path Tests +-- +-- These tests exercise the compare_agtype_scalar_containers() fast path +-- which uses fill_agtype_value_no_copy() for read-only comparisons. +-- +-- Integer comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1 < 2, 2 > 1, 1 = 1, 1 <> 2 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + lt | gt | eq | ne +------+------+------+------ + true | true | true | true +(1 row) + +SELECT * FROM cypher('direct_access', $$ + RETURN 100 < 50, 100 > 50, 100 = 100, 100 <> 100 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + lt | gt | eq | ne +-------+------+------+------- + false | true | true | false +(1 row) + +-- Float comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1.5 < 2.5, 2.5 > 1.5, 1.5 = 1.5, 1.5 <> 2.5 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + lt | gt | eq | ne +------+------+------+------ + true | true | true | true +(1 row) + +-- String comparisons (tests no-copy string pointer) +SELECT * FROM cypher('direct_access', $$ + RETURN 'abc' < 'abd', 'abd' > 'abc', 'abc' = 'abc', 'abc' <> 'abd' +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + lt | gt | eq | ne +------+------+------+------ + true | true | true | true +(1 row) + +SELECT * FROM cypher('direct_access', $$ + RETURN 'hello world' < 'hello worlds', 'test' > 'TEST' +$$) AS (lt agtype, gt agtype); + lt | gt +------+------ + true | true +(1 row) + +-- Boolean comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN false < true, true > false, true = true, false <> true +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + lt | gt | eq | ne +------+------+------+------ + true | true | true | true +(1 row) + +-- Null comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN null = null, null <> null +$$) AS (eq agtype, ne agtype); + eq | ne +----+---- + | +(1 row) + +-- Mixed numeric type comparisons (integer vs float) +SELECT * FROM cypher('direct_access', $$ + RETURN 1 < 1.5, 2.0 > 1, 1.0 = 1 +$$) AS (lt agtype, gt agtype, eq agtype); + lt | gt | eq +------+------+------ + true | true | true +(1 row) + +-- Numeric type comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1.234::numeric < 1.235::numeric, + 1.235::numeric > 1.234::numeric, + 1.234::numeric = 1.234::numeric +$$) AS (lt agtype, gt agtype, eq agtype); + lt | gt | eq +------+------+------ + true | true | true +(1 row) + +-- +-- Section 2: ORDER BY Tests (exercises comparison fast path) +-- +-- ORDER BY uses compare_agtype_containers_orderability which now has +-- a fast path for scalar comparisons. +-- +-- Integer ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [5, 3, 8, 1, 9, 2, 7, 4, 6] AS n + RETURN n ORDER BY n +$$) AS (n agtype); + n +--- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +(9 rows) + +SELECT * FROM cypher('direct_access', $$ + UNWIND [5, 3, 8, 1, 9, 2, 7, 4, 6] AS n + RETURN n ORDER BY n DESC +$$) AS (n agtype); + n +--- + 9 + 8 + 7 + 6 + 5 + 4 + 3 + 2 + 1 +(9 rows) + +-- String ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND ['banana', 'apple', 'cherry', 'date'] AS s + RETURN s ORDER BY s +$$) AS (s agtype); + s +---------- + "apple" + "banana" + "cherry" + "date" +(4 rows) + +-- Float ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [3.14, 2.71, 1.41, 1.73] AS f + RETURN f ORDER BY f +$$) AS (f agtype); + f +------ + 1.41 + 1.73 + 2.71 + 3.14 +(4 rows) + +-- Boolean ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [true, false, true, false] AS b + RETURN b ORDER BY b +$$) AS (b agtype); + b +------- + false + false + true + true +(4 rows) + +-- +-- Section 3: Vertex/Edge Direct ID Access Tests +-- +-- These tests exercise the direct pairs[0] access optimization for +-- extracting graphid from vertices and edges during comparison. +-- +-- Create test data +SELECT * FROM cypher('direct_access', $$ + CREATE (a:Person {name: 'Alice', age: 30}), + (b:Person {name: 'Bob', age: 25}), + (c:Person {name: 'Charlie', age: 35}), + (d:Person {name: 'Diana', age: 28}), + (e:Person {name: 'Eve', age: 32}), + (a)-[:KNOWS {since: 2020}]->(b), + (b)-[:KNOWS {since: 2019}]->(c), + (c)-[:KNOWS {since: 2021}]->(d), + (d)-[:KNOWS {since: 2018}]->(e), + (e)-[:KNOWS {since: 2022}]->(a) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- Test max() on vertices (uses compare_agtype_scalar_values with AGTV_VERTEX) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN max(p) +$$) AS (max_vertex agtype); + max_vertex +---------------------------------------------------------------------------------------------- + {"id": 844424930131973, "label": "Person", "properties": {"age": 32, "name": "Eve"}}::vertex +(1 row) + +-- Test min() on vertices +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN min(p) +$$) AS (min_vertex agtype); + min_vertex +------------------------------------------------------------------------------------------------ + {"id": 844424930131969, "label": "Person", "properties": {"age": 30, "name": "Alice"}}::vertex +(1 row) + +-- Test max() on edges (uses compare_agtype_scalar_values with AGTV_EDGE) +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN max(r) +$$) AS (max_edge agtype); + max_edge +----------------------------------------------------------------------------------------------------------------------------------------- + {"id": 1125899906842629, "label": "KNOWS", "end_id": 844424930131969, "start_id": 844424930131973, "properties": {"since": 2022}}::edge +(1 row) + +-- Test min() on edges +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN min(r) +$$) AS (min_edge agtype); + min_edge +----------------------------------------------------------------------------------------------------------------------------------------- + {"id": 1125899906842625, "label": "KNOWS", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {"since": 2020}}::edge +(1 row) + +-- ORDER BY on vertices (uses direct id comparison) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN p.name ORDER BY p +$$) AS (name agtype); + name +----------- + "Alice" + "Bob" + "Charlie" + "Diana" + "Eve" +(5 rows) + +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN p.name ORDER BY p DESC +$$) AS (name agtype); + name +----------- + "Eve" + "Diana" + "Charlie" + "Bob" + "Alice" +(5 rows) + +-- ORDER BY on edges +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN r.since ORDER BY r +$$) AS (since agtype); + since +------- + 2020 + 2019 + 2021 + 2018 + 2022 +(5 rows) + +-- Vertex comparison in WHERE +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person), (b:Person) + WHERE a < b + RETURN a.name, b.name +$$) AS (a_name agtype, b_name agtype); + a_name | b_name +-----------+----------- + "Alice" | "Bob" + "Alice" | "Charlie" + "Alice" | "Diana" + "Alice" | "Eve" + "Bob" | "Charlie" + "Bob" | "Diana" + "Bob" | "Eve" + "Charlie" | "Diana" + "Charlie" | "Eve" + "Diana" | "Eve" +(10 rows) + +-- +-- Section 4: Fast Path for get_one_agtype_from_variadic_args +-- +-- These tests exercise the fast path that bypasses extract_variadic_args +-- when the argument is already agtype. +-- +-- Direct agtype comparison operators (use the fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN 42 = 42, 42 <> 43, 42 < 100, 42 > 10 +$$) AS (eq agtype, ne agtype, lt agtype, gt agtype); + eq | ne | lt | gt +------+------+------+------ + true | true | true | true +(1 row) + +-- Arithmetic operators (also use the fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN 10 + 5, 10 - 5, 10 * 5, 10 / 5 +$$) AS (add agtype, sub agtype, mul agtype, div agtype); + add | sub | mul | div +-----+-----+-----+----- + 15 | 5 | 50 | 2 +(1 row) + +-- String functions that take agtype args +SELECT * FROM cypher('direct_access', $$ + RETURN toUpper('hello'), toLower('WORLD'), size('test') +$$) AS (upper agtype, lower agtype, sz agtype); + upper | lower | sz +---------+---------+---- + "HELLO" | "world" | 4 +(1 row) + +-- Type checking functions +SELECT * FROM cypher('direct_access', $$ + RETURN toInteger('42'), toFloat('3.14'), toString(42) +$$) AS (int_val agtype, float_val agtype, str_val agtype); + int_val | float_val | str_val +---------+-----------+--------- + 42 | 3.14 | "42" +(1 row) + +-- +-- Section 5: Direct Field Access for Accessor Functions +-- +-- These tests exercise the direct field access macros in id(), start_id(), +-- end_id(), label(), and properties() functions. +-- +-- Test id() on vertices (uses AGTYPE_VERTEX_GET_ID macro - index 0) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN id(p) +$$) AS (vertex_id agtype); + vertex_id +----------------- + 844424930131969 +(1 row) + +-- Test id() on edges (uses AGTYPE_EDGE_GET_ID macro - index 0) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN id(r) +$$) AS (edge_id agtype); + edge_id +------------------ + 1125899906842625 +(1 row) + +-- Test start_id() on edges (uses AGTYPE_EDGE_GET_START_ID macro - index 3) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN start_id(r), id(a) +$$) AS (start_id agtype, alice_id agtype); + start_id | alice_id +-----------------+----------------- + 844424930131969 | 844424930131969 +(1 row) + +-- Test end_id() on edges (uses AGTYPE_EDGE_GET_END_ID macro - index 2) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN end_id(r), id(b) +$$) AS (end_id agtype, bob_id agtype); + end_id | bob_id +-----------------+----------------- + 844424930131970 | 844424930131970 +(1 row) + +-- Test label() on vertices (uses AGTYPE_VERTEX_GET_LABEL macro - index 1) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN label(p) +$$) AS (vertex_label agtype); + vertex_label +-------------- + "Person" +(1 row) + +-- Test label() on edges (uses AGTYPE_EDGE_GET_LABEL macro - index 1) +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN DISTINCT label(r) +$$) AS (edge_label agtype); + edge_label +------------ + "KNOWS" +(1 row) + +-- Test properties() on vertices (uses AGTYPE_VERTEX_GET_PROPERTIES macro - index 2) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN properties(p) +$$) AS (vertex_props agtype); + vertex_props +------------------------------ + {"age": 30, "name": "Alice"} +(1 row) + +-- Test properties() on edges (uses AGTYPE_EDGE_GET_PROPERTIES macro - index 4) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN properties(r) +$$) AS (edge_props agtype); + edge_props +----------------- + {"since": 2020} +(1 row) + +-- Combined accessor test - verify all fields are accessible +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person) + RETURN id(a), label(a), properties(a).name, + id(r), start_id(r), end_id(r), label(r), properties(r).since, + id(b), label(b), properties(b).name +$$) AS (a_id agtype, a_label agtype, a_name agtype, + r_id agtype, r_start agtype, r_end agtype, r_label agtype, r_since agtype, + b_id agtype, b_label agtype, b_name agtype); + a_id | a_label | a_name | r_id | r_start | r_end | r_label | r_since | b_id | b_label | b_name +-----------------+----------+---------+------------------+-----------------+-----------------+---------+---------+-----------------+----------+-------- + 844424930131969 | "Person" | "Alice" | 1125899906842625 | 844424930131969 | 844424930131970 | "KNOWS" | 2020 | 844424930131970 | "Person" | "Bob" +(1 row) + +-- +-- Section 6: Mixed Comparisons and Edge Cases +-- +-- Array comparisons (should NOT use scalar fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN [1,2,3] = [1,2,3], [1,2,3] < [1,2,4] +$$) AS (eq agtype, lt agtype); + eq | lt +------+------ + true | true +(1 row) + +-- Object comparisons (should NOT use scalar fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN {a:1, b:2} = {a:1, b:2} +$$) AS (eq agtype); + eq +------ + true +(1 row) + +-- Large integer comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 9223372036854775807 > 9223372036854775806, + -9223372036854775808 < -9223372036854775807 +$$) AS (big_gt agtype, neg_lt agtype); + big_gt | neg_lt +--------+-------- + true | true +(1 row) + +-- Empty string comparison +SELECT * FROM cypher('direct_access', $$ + RETURN '' < 'a', '' = '' +$$) AS (lt agtype, eq agtype); + lt | eq +------+------ + true | true +(1 row) + +-- Special float values +SELECT * FROM cypher('direct_access', $$ + RETURN 0.0 = -0.0 +$$) AS (zero_eq agtype); + zero_eq +--------- + true +(1 row) + +-- +-- Cleanup +-- +SELECT drop_graph('direct_access', true); +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table direct_access._ag_label_vertex +drop cascades to table direct_access._ag_label_edge +drop cascades to table direct_access."Person" +drop cascades to table direct_access."KNOWS" +NOTICE: graph "direct_access" has been dropped + drop_graph +------------ + +(1 row) + diff --git a/regress/sql/direct_field_access.sql b/regress/sql/direct_field_access.sql new file mode 100644 index 000000000..c8060be4a --- /dev/null +++ b/regress/sql/direct_field_access.sql @@ -0,0 +1,319 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Direct Field Access Optimizations Test + * + * Tests for optimizations that directly access agtype fields without + * using the full iterator machinery or binary search: + * + * 1. fill_agtype_value_no_copy() - Read-only access without memory allocation + * 2. compare_agtype_scalar_containers() - Fast path for scalar comparisons + * 3. Direct pairs[0] access for vertex/edge id comparison + * 4. Fast path in get_one_agtype_from_variadic_args() + */ + +LOAD 'age'; +SET search_path TO ag_catalog; + +SELECT create_graph('direct_access'); + +-- +-- Section 1: Scalar Comparison Fast Path Tests +-- +-- These tests exercise the compare_agtype_scalar_containers() fast path +-- which uses fill_agtype_value_no_copy() for read-only comparisons. +-- + +-- Integer comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1 < 2, 2 > 1, 1 = 1, 1 <> 2 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + +SELECT * FROM cypher('direct_access', $$ + RETURN 100 < 50, 100 > 50, 100 = 100, 100 <> 100 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + +-- Float comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1.5 < 2.5, 2.5 > 1.5, 1.5 = 1.5, 1.5 <> 2.5 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + +-- String comparisons (tests no-copy string pointer) +SELECT * FROM cypher('direct_access', $$ + RETURN 'abc' < 'abd', 'abd' > 'abc', 'abc' = 'abc', 'abc' <> 'abd' +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + +SELECT * FROM cypher('direct_access', $$ + RETURN 'hello world' < 'hello worlds', 'test' > 'TEST' +$$) AS (lt agtype, gt agtype); + +-- Boolean comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN false < true, true > false, true = true, false <> true +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + +-- Null comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN null = null, null <> null +$$) AS (eq agtype, ne agtype); + +-- Mixed numeric type comparisons (integer vs float) +SELECT * FROM cypher('direct_access', $$ + RETURN 1 < 1.5, 2.0 > 1, 1.0 = 1 +$$) AS (lt agtype, gt agtype, eq agtype); + +-- Numeric type comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1.234::numeric < 1.235::numeric, + 1.235::numeric > 1.234::numeric, + 1.234::numeric = 1.234::numeric +$$) AS (lt agtype, gt agtype, eq agtype); + +-- +-- Section 2: ORDER BY Tests (exercises comparison fast path) +-- +-- ORDER BY uses compare_agtype_containers_orderability which now has +-- a fast path for scalar comparisons. +-- + +-- Integer ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [5, 3, 8, 1, 9, 2, 7, 4, 6] AS n + RETURN n ORDER BY n +$$) AS (n agtype); + +SELECT * FROM cypher('direct_access', $$ + UNWIND [5, 3, 8, 1, 9, 2, 7, 4, 6] AS n + RETURN n ORDER BY n DESC +$$) AS (n agtype); + +-- String ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND ['banana', 'apple', 'cherry', 'date'] AS s + RETURN s ORDER BY s +$$) AS (s agtype); + +-- Float ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [3.14, 2.71, 1.41, 1.73] AS f + RETURN f ORDER BY f +$$) AS (f agtype); + +-- Boolean ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [true, false, true, false] AS b + RETURN b ORDER BY b +$$) AS (b agtype); + +-- +-- Section 3: Vertex/Edge Direct ID Access Tests +-- +-- These tests exercise the direct pairs[0] access optimization for +-- extracting graphid from vertices and edges during comparison. +-- + +-- Create test data +SELECT * FROM cypher('direct_access', $$ + CREATE (a:Person {name: 'Alice', age: 30}), + (b:Person {name: 'Bob', age: 25}), + (c:Person {name: 'Charlie', age: 35}), + (d:Person {name: 'Diana', age: 28}), + (e:Person {name: 'Eve', age: 32}), + (a)-[:KNOWS {since: 2020}]->(b), + (b)-[:KNOWS {since: 2019}]->(c), + (c)-[:KNOWS {since: 2021}]->(d), + (d)-[:KNOWS {since: 2018}]->(e), + (e)-[:KNOWS {since: 2022}]->(a) +$$) AS (result agtype); + +-- Test max() on vertices (uses compare_agtype_scalar_values with AGTV_VERTEX) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN max(p) +$$) AS (max_vertex agtype); + +-- Test min() on vertices +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN min(p) +$$) AS (min_vertex agtype); + +-- Test max() on edges (uses compare_agtype_scalar_values with AGTV_EDGE) +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN max(r) +$$) AS (max_edge agtype); + +-- Test min() on edges +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN min(r) +$$) AS (min_edge agtype); + +-- ORDER BY on vertices (uses direct id comparison) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN p.name ORDER BY p +$$) AS (name agtype); + +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN p.name ORDER BY p DESC +$$) AS (name agtype); + +-- ORDER BY on edges +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN r.since ORDER BY r +$$) AS (since agtype); + +-- Vertex comparison in WHERE +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person), (b:Person) + WHERE a < b + RETURN a.name, b.name +$$) AS (a_name agtype, b_name agtype); + +-- +-- Section 4: Fast Path for get_one_agtype_from_variadic_args +-- +-- These tests exercise the fast path that bypasses extract_variadic_args +-- when the argument is already agtype. +-- + +-- Direct agtype comparison operators (use the fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN 42 = 42, 42 <> 43, 42 < 100, 42 > 10 +$$) AS (eq agtype, ne agtype, lt agtype, gt agtype); + +-- Arithmetic operators (also use the fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN 10 + 5, 10 - 5, 10 * 5, 10 / 5 +$$) AS (add agtype, sub agtype, mul agtype, div agtype); + +-- String functions that take agtype args +SELECT * FROM cypher('direct_access', $$ + RETURN toUpper('hello'), toLower('WORLD'), size('test') +$$) AS (upper agtype, lower agtype, sz agtype); + +-- Type checking functions +SELECT * FROM cypher('direct_access', $$ + RETURN toInteger('42'), toFloat('3.14'), toString(42) +$$) AS (int_val agtype, float_val agtype, str_val agtype); + +-- +-- Section 5: Direct Field Access for Accessor Functions +-- +-- These tests exercise the direct field access macros in id(), start_id(), +-- end_id(), label(), and properties() functions. +-- + +-- Test id() on vertices (uses AGTYPE_VERTEX_GET_ID macro - index 0) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN id(p) +$$) AS (vertex_id agtype); + +-- Test id() on edges (uses AGTYPE_EDGE_GET_ID macro - index 0) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN id(r) +$$) AS (edge_id agtype); + +-- Test start_id() on edges (uses AGTYPE_EDGE_GET_START_ID macro - index 3) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN start_id(r), id(a) +$$) AS (start_id agtype, alice_id agtype); + +-- Test end_id() on edges (uses AGTYPE_EDGE_GET_END_ID macro - index 2) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN end_id(r), id(b) +$$) AS (end_id agtype, bob_id agtype); + +-- Test label() on vertices (uses AGTYPE_VERTEX_GET_LABEL macro - index 1) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN label(p) +$$) AS (vertex_label agtype); + +-- Test label() on edges (uses AGTYPE_EDGE_GET_LABEL macro - index 1) +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN DISTINCT label(r) +$$) AS (edge_label agtype); + +-- Test properties() on vertices (uses AGTYPE_VERTEX_GET_PROPERTIES macro - index 2) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN properties(p) +$$) AS (vertex_props agtype); + +-- Test properties() on edges (uses AGTYPE_EDGE_GET_PROPERTIES macro - index 4) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN properties(r) +$$) AS (edge_props agtype); + +-- Combined accessor test - verify all fields are accessible +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person) + RETURN id(a), label(a), properties(a).name, + id(r), start_id(r), end_id(r), label(r), properties(r).since, + id(b), label(b), properties(b).name +$$) AS (a_id agtype, a_label agtype, a_name agtype, + r_id agtype, r_start agtype, r_end agtype, r_label agtype, r_since agtype, + b_id agtype, b_label agtype, b_name agtype); + +-- +-- Section 6: Mixed Comparisons and Edge Cases +-- + +-- Array comparisons (should NOT use scalar fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN [1,2,3] = [1,2,3], [1,2,3] < [1,2,4] +$$) AS (eq agtype, lt agtype); + +-- Object comparisons (should NOT use scalar fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN {a:1, b:2} = {a:1, b:2} +$$) AS (eq agtype); + +-- Large integer comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 9223372036854775807 > 9223372036854775806, + -9223372036854775808 < -9223372036854775807 +$$) AS (big_gt agtype, neg_lt agtype); + +-- Empty string comparison +SELECT * FROM cypher('direct_access', $$ + RETURN '' < 'a', '' = '' +$$) AS (lt agtype, eq agtype); + +-- Special float values +SELECT * FROM cypher('direct_access', $$ + RETURN 0.0 = -0.0 +$$) AS (zero_eq agtype); + +-- +-- Cleanup +-- +SELECT drop_graph('direct_access', true); diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index a38ac857f..44d82997f 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -5404,10 +5404,24 @@ Datum age_id(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("id() argument must be a vertex, an edge or null"))); - agtv_result = GET_AGTYPE_VALUE_OBJECT_VALUE(agtv_object, "id"); - - Assert(agtv_result != NULL); - Assert(agtv_result->type = AGTV_INTEGER); + /* + * Direct field access optimization: id is at a fixed index for both + * vertex and edge objects due to key length sorting. + */ + if (agtv_object->type == AGTV_VERTEX) + { + agtv_result = AGTYPE_VERTEX_GET_ID(agtv_object); + } + else if (agtv_object->type == AGTV_EDGE) + { + agtv_result = AGTYPE_EDGE_GET_ID(agtv_object); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("id() unexpected argument type"))); + } PG_RETURN_POINTER(agtype_value_to_agtype(agtv_result)); } @@ -5442,10 +5456,11 @@ Datum age_start_id(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("start_id() argument must be an edge or null"))); - agtv_result = GET_AGTYPE_VALUE_OBJECT_VALUE(agtv_object, "start_id"); - - Assert(agtv_result != NULL); - Assert(agtv_result->type = AGTV_INTEGER); + /* + * Direct field access optimization: start_id is at index 3 for edge + * objects due to key length sorting (id=0, label=1, end_id=2, start_id=3). + */ + agtv_result = AGTYPE_EDGE_GET_START_ID(agtv_object); PG_RETURN_POINTER(agtype_value_to_agtype(agtv_result)); } @@ -5480,10 +5495,11 @@ Datum age_end_id(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("end_id() argument must be an edge or null"))); - agtv_result = GET_AGTYPE_VALUE_OBJECT_VALUE(agtv_object, "end_id"); - - Assert(agtv_result != NULL); - Assert(agtv_result->type = AGTV_INTEGER); + /* + * Direct field access optimization: end_id is at index 2 for edge + * objects due to key length sorting (id=0, label=1, end_id=2). + */ + agtv_result = AGTYPE_EDGE_GET_END_ID(agtv_object); PG_RETURN_POINTER(agtype_value_to_agtype(agtv_result)); } @@ -6037,10 +6053,25 @@ Datum age_properties(PG_FUNCTION_ARGS) errmsg("properties() argument must be a vertex, an edge or null"))); } - agtv_result = GET_AGTYPE_VALUE_OBJECT_VALUE(agtv_object, "properties"); - - Assert(agtv_result != NULL); - Assert(agtv_result->type = AGTV_OBJECT); + /* + * Direct field access optimization: properties is at index 2 for vertex + * (id=0, label=1, properties=2) and index 4 for edge (id=0, label=1, + * end_id=2, start_id=3, properties=4) due to key length sorting. + */ + if (agtv_object->type == AGTV_VERTEX) + { + agtv_result = AGTYPE_VERTEX_GET_PROPERTIES(agtv_object); + } + else if (agtv_object->type == AGTV_EDGE) + { + agtv_result = AGTYPE_EDGE_GET_PROPERTIES(agtv_object); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("properties() unexpected argument type"))); + } PG_RETURN_POINTER(agtype_value_to_agtype(agtv_result)); } @@ -7175,8 +7206,24 @@ Datum age_label(PG_FUNCTION_ARGS) } - /* extract the label agtype value from the vertex or edge */ - label = GET_AGTYPE_VALUE_OBJECT_VALUE(agtv_value, "label"); + /* + * Direct field access optimization: label is at a fixed index for both + * vertex and edge objects due to key length sorting. + */ + if (agtv_value->type == AGTV_VERTEX) + { + label = AGTYPE_VERTEX_GET_LABEL(agtv_value); + } + else if (agtv_value->type == AGTV_EDGE) + { + label = AGTYPE_EDGE_GET_LABEL(agtv_value); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("label() unexpected argument type"))); + } PG_RETURN_POINTER(agtype_value_to_agtype(label)); } @@ -10512,6 +10559,59 @@ agtype *get_one_agtype_from_variadic_args(FunctionCallInfo fcinfo, Oid *types = NULL; agtype *agtype_result = NULL; + /* + * Fast path optimization: For non-variadic calls where the argument + * is already an agtype, we can avoid the overhead of extract_variadic_args + * which allocates three arrays. This is the common case for most agtype + * comparison and arithmetic operators. + */ + if (!get_fn_expr_variadic(fcinfo->flinfo)) + { + int total_args = PG_NARGS(); + int actual_nargs = total_args - variadic_offset; + + /* Verify expected number of arguments */ + if (actual_nargs != expected_nargs) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("number of args %d does not match expected %d", + actual_nargs, expected_nargs))); + } + + /* Check for SQL NULL */ + if (PG_ARGISNULL(variadic_offset)) + { + return NULL; + } + + /* Check if the argument is already an agtype */ + if (get_fn_expr_argtype(fcinfo->flinfo, variadic_offset) == AGTYPEOID) + { + agtype_container *agtc; + + agtype_result = DATUM_GET_AGTYPE_P(PG_GETARG_DATUM(variadic_offset)); + agtc = &agtype_result->root; + + /* + * Is this a scalar (scalars are stored as one element arrays)? + * If so, test for agtype NULL. + */ + if (AGTYPE_CONTAINER_IS_SCALAR(agtc) && + AGTE_IS_NULL(agtc->children[0])) + { + return NULL; + } + + return agtype_result; + } + + /* + * Not an agtype, need to convert. Fall through to use + * extract_variadic_args for type conversion handling. + */ + } + + /* Standard path using extract_variadic_args */ nargs = extract_variadic_args(fcinfo, variadic_offset, false, &args, &types, &nulls); /* throw an error if the number of args is not the expected number */ diff --git a/src/backend/utils/adt/agtype_util.c b/src/backend/utils/adt/agtype_util.c index 01a965cdd..b39723413 100644 --- a/src/backend/utils/adt/agtype_util.c +++ b/src/backend/utils/adt/agtype_util.c @@ -41,6 +41,14 @@ #include "utils/agtype_ext.h" +/* + * Extended type header macros - must match definitions in agtype_ext.c. + * These are used for deserializing extended agtype values (INTEGER, FLOAT, + * VERTEX, EDGE, PATH) from their binary representation. + */ +#define AGT_HEADER_TYPE uint32 +#define AGT_HEADER_SIZE sizeof(AGT_HEADER_TYPE) + /* * Maximum number of elements in an array (or key/value pairs in an object). * This is limited by two things: the size of the agtentry array must fit @@ -56,6 +64,11 @@ static void fill_agtype_value(agtype_container *container, int index, char *base_addr, uint32 offset, agtype_value *result); +static void fill_agtype_value_no_copy(agtype_container *container, int index, + char *base_addr, uint32 offset, + agtype_value *result); +static int compare_agtype_scalar_containers(agtype_container *a, + agtype_container *b); static bool equals_agtype_scalar_value(agtype_value *a, agtype_value *b); static agtype *convert_to_agtype(agtype_value *val); static void convert_agtype_value(StringInfo buffer, agtentry *header, @@ -264,6 +277,24 @@ int compare_agtype_containers_orderability(agtype_container *a, agtype_iterator *itb; int res = 0; + /* + * Fast path optimization for scalar values. + * + * The most common case in ORDER BY and comparison operations is comparing + * scalar values (integers, strings, floats, etc.). For these cases, we can + * avoid the overhead of the full iterator machinery by directly extracting + * and comparing the scalar values. + * + * This provides significant performance improvement because: + * 1. We avoid allocating two agtype_iterator structures + * 2. We avoid the iterator state machine overhead + * 3. We use no-copy extraction where possible + */ + if (AGTYPE_CONTAINER_IS_SCALAR(a) && AGTYPE_CONTAINER_IS_SCALAR(b)) + { + return compare_agtype_scalar_containers(a, b); + } + ita = agtype_iterator_init(a); itb = agtype_iterator_init(b); @@ -751,6 +782,173 @@ static void fill_agtype_value(agtype_container *container, int index, } } +/* + * A helper function to fill in an agtype_value WITHOUT making deep copies. + * This is used for read-only comparison operations where the agtype_value + * will not outlive the container data. The caller MUST NOT free the + * agtype_value content or use it after the container is freed. + * + * This function provides significant performance improvements for comparison + * operations by avoiding palloc/memcpy for strings and numerics. + * + * Note: For AGTV_STRING, val.string.val points directly into container data. + * Note: For AGTV_NUMERIC, val.numeric points directly into container data. + * Note: Extended types (VERTEX, EDGE, PATH) still require deserialization, + * so they use the standard fill_agtype_value path. + */ +static void fill_agtype_value_no_copy(agtype_container *container, int index, + char *base_addr, uint32 offset, + agtype_value *result) +{ + agtentry entry = container->children[index]; + + if (AGTE_IS_NULL(entry)) + { + result->type = AGTV_NULL; + } + else if (AGTE_IS_STRING(entry)) + { + result->type = AGTV_STRING; + /* Point directly into the container data - no copy */ + result->val.string.val = base_addr + offset; + result->val.string.len = get_agtype_length(container, index); + } + else if (AGTE_IS_NUMERIC(entry)) + { + result->type = AGTV_NUMERIC; + /* Point directly into the container data - no copy */ + result->val.numeric = (Numeric)(base_addr + INTALIGN(offset)); + } + else if (AGTE_IS_AGTYPE(entry)) + { + /* + * For extended types (INTEGER, FLOAT, VERTEX, EDGE, PATH), we need + * to deserialize. INTEGER and FLOAT don't allocate, but composite + * types (VERTEX, EDGE, PATH) do. For simple scalar comparisons, + * we handle INTEGER and FLOAT directly here. + */ + char *base = base_addr + INTALIGN(offset); + AGT_HEADER_TYPE agt_header = *((AGT_HEADER_TYPE *)base); + + switch (agt_header) + { + case AGT_HEADER_INTEGER: + result->type = AGTV_INTEGER; + result->val.int_value = *((int64 *)(base + AGT_HEADER_SIZE)); + break; + + case AGT_HEADER_FLOAT: + result->type = AGTV_FLOAT; + result->val.float_value = *((float8 *)(base + AGT_HEADER_SIZE)); + break; + + default: + /* + * For VERTEX, EDGE, PATH - use standard deserialization. + * These are composite types that require full parsing. + */ + ag_deserialize_extended_type(base_addr, offset, result); + break; + } + } + else if (AGTE_IS_BOOL_TRUE(entry)) + { + result->type = AGTV_BOOL; + result->val.boolean = true; + } + else if (AGTE_IS_BOOL_FALSE(entry)) + { + result->type = AGTV_BOOL; + result->val.boolean = false; + } + else + { + Assert(AGTE_IS_CONTAINER(entry)); + result->type = AGTV_BINARY; + /* Remove alignment padding from data pointer and length */ + result->val.binary.data = + (agtype_container *)(base_addr + INTALIGN(offset)); + result->val.binary.len = get_agtype_length(container, index) - + (INTALIGN(offset) - offset); + } +} + +/* + * Fast path comparison for scalar agtype containers. + * + * This function compares two scalar containers directly without the overhead + * of the full iterator machinery. It extracts the scalar values using no-copy + * fill and compares them directly. + * + * Returns: negative if a < b, 0 if a == b, positive if a > b + */ +static int compare_agtype_scalar_containers(agtype_container *a, + agtype_container *b) +{ + agtype_value va; + agtype_value vb; + char *base_addr_a; + char *base_addr_b; + int result; + bool need_free_a = false; + bool need_free_b = false; + + Assert(AGTYPE_CONTAINER_IS_SCALAR(a)); + Assert(AGTYPE_CONTAINER_IS_SCALAR(b)); + + /* Scalars are stored as single-element arrays */ + base_addr_a = (char *)&a->children[1]; + base_addr_b = (char *)&b->children[1]; + + /* Use no-copy fill to avoid allocations for simple types */ + fill_agtype_value_no_copy(a, 0, base_addr_a, 0, &va); + fill_agtype_value_no_copy(b, 0, base_addr_b, 0, &vb); + + /* + * Check if we need to free the values after comparison. + * Only VERTEX, EDGE, and PATH types allocate memory in no-copy mode. + */ + if (va.type == AGTV_VERTEX || va.type == AGTV_EDGE || va.type == AGTV_PATH) + { + need_free_a = true; + } + if (vb.type == AGTV_VERTEX || vb.type == AGTV_EDGE || vb.type == AGTV_PATH) + { + need_free_b = true; + } + + /* + * Compare the scalar values. If types match or are numeric compatible, + * use scalar comparison. Otherwise, use type-based ordering. + */ + if ((va.type == vb.type) || + ((va.type == AGTV_INTEGER || va.type == AGTV_FLOAT || + va.type == AGTV_NUMERIC) && + (vb.type == AGTV_INTEGER || vb.type == AGTV_FLOAT || + vb.type == AGTV_NUMERIC))) + { + result = compare_agtype_scalar_values(&va, &vb); + } + else + { + /* Type-defined order */ + result = (get_type_sort_priority(va.type) < + get_type_sort_priority(vb.type)) ? -1 : 1; + } + + /* Free any allocated memory from composite types */ + if (need_free_a) + { + pfree_agtype_value_content(&va); + } + if (need_free_b) + { + pfree_agtype_value_content(&vb); + } + + return result; +} + /* * Push agtype_value into agtype_parse_state. * @@ -1597,7 +1795,8 @@ void agtype_hash_scalar_value_extended(const agtype_value *scalar_val, case AGTV_VERTEX: { graphid id; - agtype_value *id_agt = GET_AGTYPE_VALUE_OBJECT_VALUE(scalar_val, "id"); + agtype_value *id_agt; + id_agt = AGTYPE_VERTEX_GET_ID(scalar_val); id = id_agt->val.int_value; tmp = DatumGetUInt64(DirectFunctionCall2( hashint8extended, Float8GetDatum(id), UInt64GetDatum(seed))); @@ -1606,7 +1805,8 @@ void agtype_hash_scalar_value_extended(const agtype_value *scalar_val, case AGTV_EDGE: { graphid id; - agtype_value *id_agt = GET_AGTYPE_VALUE_OBJECT_VALUE(scalar_val, "id"); + agtype_value *id_agt; + id_agt = AGTYPE_EDGE_GET_ID(scalar_val); id = id_agt->val.int_value; tmp = DatumGetUInt64(DirectFunctionCall2( hashint8extended, Float8GetDatum(id), UInt64GetDatum(seed))); @@ -1704,8 +1904,8 @@ static bool equals_agtype_scalar_value(agtype_value *a, agtype_value *b) case AGTV_VERTEX: { graphid a_graphid, b_graphid; - a_graphid = a->val.object.pairs[0].value.val.int_value; - b_graphid = b->val.object.pairs[0].value.val.int_value; + a_graphid = AGTYPE_VERTEX_GET_ID(a)->val.int_value; + b_graphid = AGTYPE_VERTEX_GET_ID(b)->val.int_value; return a_graphid == b_graphid; } @@ -1790,16 +1990,33 @@ int compare_agtype_scalar_values(agtype_value *a, agtype_value *b) return compare_two_floats_orderability(a->val.float_value, b->val.float_value); case AGTV_VERTEX: - case AGTV_EDGE: { - agtype_value *a_id, *b_id; graphid a_graphid, b_graphid; - a_id = GET_AGTYPE_VALUE_OBJECT_VALUE(a, "id"); - b_id = GET_AGTYPE_VALUE_OBJECT_VALUE(b, "id"); + /* Direct field access optimization using macros defined in agtype.h. */ + a_graphid = AGTYPE_VERTEX_GET_ID(a)->val.int_value; + b_graphid = AGTYPE_VERTEX_GET_ID(b)->val.int_value; + + if (a_graphid == b_graphid) + { + return 0; + } + else if (a_graphid > b_graphid) + { + return 1; + } + else + { + return -1; + } + } + case AGTV_EDGE: + { + graphid a_graphid, b_graphid; - a_graphid = a_id->val.int_value; - b_graphid = b_id->val.int_value; + /* Direct field access optimization using macros defined in agtype.h. */ + a_graphid = AGTYPE_EDGE_GET_ID(a)->val.int_value; + b_graphid = AGTYPE_EDGE_GET_ID(b)->val.int_value; if (a_graphid == b_graphid) { diff --git a/src/include/utils/agtype.h b/src/include/utils/agtype.h index 486775320..1f6908103 100644 --- a/src/include/utils/agtype.h +++ b/src/include/utils/agtype.h @@ -322,6 +322,109 @@ enum agtype_value_type AGTV_BINARY }; +/* + * Direct field access indices for vertex and edge objects. + * + * Vertex and edge objects are serialized with keys sorted by length first, + * then lexicographically (via uniqueify_agtype_object). This means field + * positions are deterministic and can be accessed directly without binary + * search, providing O(1) access instead of O(log n). + * + * Vertex keys by length: "id"(2), "label"(5), "properties"(10) + * Edge keys by length: "id"(2), "label"(5), "end_id"(6), "start_id"(8), "properties"(10) + */ +#define VERTEX_FIELD_ID 0 +#define VERTEX_FIELD_LABEL 1 +#define VERTEX_FIELD_PROPERTIES 2 +#define VERTEX_NUM_FIELDS 3 + +#define EDGE_FIELD_ID 0 +#define EDGE_FIELD_LABEL 1 +#define EDGE_FIELD_END_ID 2 +#define EDGE_FIELD_START_ID 3 +#define EDGE_FIELD_PROPERTIES 4 +#define EDGE_NUM_FIELDS 5 + +/* + * Macros for direct field access from vertex/edge agtype_value objects. + * These avoid the binary search overhead of GET_AGTYPE_VALUE_OBJECT_VALUE. + * Validation is integrated - macros will error if field count is incorrect. + * Uses GCC statement expressions to allow validation within expressions. + */ +#define AGTYPE_VERTEX_GET_ID(v) \ + ({ \ + if ((v)->val.object.num_pairs != VERTEX_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid vertex structure: expected %d fields, found %d", \ + VERTEX_NUM_FIELDS, (v)->val.object.num_pairs))); \ + &(v)->val.object.pairs[VERTEX_FIELD_ID].value; \ + }) +#define AGTYPE_VERTEX_GET_LABEL(v) \ + ({ \ + if ((v)->val.object.num_pairs != VERTEX_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid vertex structure: expected %d fields, found %d", \ + VERTEX_NUM_FIELDS, (v)->val.object.num_pairs))); \ + &(v)->val.object.pairs[VERTEX_FIELD_LABEL].value; \ + }) +#define AGTYPE_VERTEX_GET_PROPERTIES(v) \ + ({ \ + if ((v)->val.object.num_pairs != VERTEX_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid vertex structure: expected %d fields, found %d", \ + VERTEX_NUM_FIELDS, (v)->val.object.num_pairs))); \ + &(v)->val.object.pairs[VERTEX_FIELD_PROPERTIES].value; \ + }) + +#define AGTYPE_EDGE_GET_ID(e) \ + ({ \ + if ((e)->val.object.num_pairs != EDGE_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid edge structure: expected %d fields, found %d", \ + EDGE_NUM_FIELDS, (e)->val.object.num_pairs))); \ + &(e)->val.object.pairs[EDGE_FIELD_ID].value; \ + }) +#define AGTYPE_EDGE_GET_LABEL(e) \ + ({ \ + if ((e)->val.object.num_pairs != EDGE_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid edge structure: expected %d fields, found %d", \ + EDGE_NUM_FIELDS, (e)->val.object.num_pairs))); \ + &(e)->val.object.pairs[EDGE_FIELD_LABEL].value; \ + }) +#define AGTYPE_EDGE_GET_END_ID(e) \ + ({ \ + if ((e)->val.object.num_pairs != EDGE_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid edge structure: expected %d fields, found %d", \ + EDGE_NUM_FIELDS, (e)->val.object.num_pairs))); \ + &(e)->val.object.pairs[EDGE_FIELD_END_ID].value; \ + }) +#define AGTYPE_EDGE_GET_START_ID(e) \ + ({ \ + if ((e)->val.object.num_pairs != EDGE_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid edge structure: expected %d fields, found %d", \ + EDGE_NUM_FIELDS, (e)->val.object.num_pairs))); \ + &(e)->val.object.pairs[EDGE_FIELD_START_ID].value; \ + }) +#define AGTYPE_EDGE_GET_PROPERTIES(e) \ + ({ \ + if ((e)->val.object.num_pairs != EDGE_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid edge structure: expected %d fields, found %d", \ + EDGE_NUM_FIELDS, (e)->val.object.num_pairs))); \ + &(e)->val.object.pairs[EDGE_FIELD_PROPERTIES].value; \ + }) + /* * agtype_value: In-memory representation of agtype. This is a convenient * deserialized representation, that can easily support using the "val" From 513043ca9951ac2df9c8967a6c738aba72bf3471 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Sat, 17 Jan 2026 03:18:10 -0800 Subject: [PATCH 17/20] Upgrade Jest to v29 for node: protocol compatibility (#2307) Note: This PR was created with AI tools and a human. The pg-connection-string module (dependency of pg) now uses the node: protocol prefix for built-in modules (e.g., require('node:process')). Jest 26 does not support this syntax, causing test failures. Changes: - Upgrade jest from ^26.6.3 to ^29.7.0 - Upgrade ts-jest from ^26.5.1 to ^29.4.6 - Upgrade @types/jest from ^26.0.20 to ^29.5.14 - Update typescript to ^4.9.5 This also resolves 19 npm audit vulnerabilities (17 moderate, 2 high) that existed in the older Jest 26 dependency tree. modified: drivers/nodejs/package.json --- drivers/nodejs/package.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nodejs/package.json b/drivers/nodejs/package.json index 9f88bc2ba..6be11c780 100644 --- a/drivers/nodejs/package.json +++ b/drivers/nodejs/package.json @@ -33,7 +33,7 @@ "pg": ">=6.0.0" }, "devDependencies": { - "@types/jest": "^26.0.20", + "@types/jest": "^29.5.14", "@types/pg": "^7.14.10", "@typescript-eslint/eslint-plugin": "^4.22.1", "@typescript-eslint/parser": "^4.22.1", @@ -44,8 +44,8 @@ "eslint-plugin-jest": "^24.3.6", "eslint-plugin-node": "^11.1.0", "eslint-plugin-promise": "^4.3.1", - "jest": "^26.6.3", - "ts-jest": "^26.5.1", - "typescript": "^4.1.5" + "jest": "^29.7.0", + "ts-jest": "^29.4.6", + "typescript": "^4.9.5" } } From 994e7dd0326b0e3280f65a9fa5fc1f8710c3dce5 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Sun, 18 Jan 2026 10:02:55 -0800 Subject: [PATCH 18/20] Fix Issue 1884: Ambiguous column reference (#2306) Fix Issue 1884: Ambiguous column reference and invalid AGT header errors. Note: This PR was created with AI tools and a human, or 2. This commit addresses two related bugs that occur when using SET to store graph elements (vertices, edges, paths) as property values: Issue 1884 - "column reference is ambiguous" error: When a Cypher query uses the same variable in both the SET expression RHS and the RETURN clause (e.g., SET n.prop = n RETURN n), PostgreSQL would report "column reference is ambiguous" because the variable appeared in multiple subqueries without proper qualification. Solution: The fix for this issue was already in place through the target entry naming scheme that qualifies column references. "Invalid AGT header value" offset error: When deserializing nested VERTEX, EDGE, or PATH values stored in properties, the system would fail with errors like "Invalid AGT header value: 0x00000041". This occurred because ag_serialize_extended_type() did not include alignment padding (padlen) in the agtentry length calculation for these types, while fill_agtype_value() uses INTALIGN() when reading, causing offset mismatch. Solution: Modified ag_serialize_extended_type() in agtype_ext.c to include padlen in the agtentry length for VERTEX, EDGE, and PATH cases, matching the existing pattern used for INTEGER, FLOAT, and NUMERIC types: *agtentry = AGTENTRY_IS_AGTYPE | (padlen + (AGTENTRY_OFFLENMASK & ...)); This ensures the serialized length accounts for alignment padding, allowing correct deserialization of nested graph elements. Appropriate regression tests were added to verify the fixes. Co-authored by: Zainab Saad <105385638+Zainab-Saad@users.noreply.github.com> modified: regress/expected/cypher_set.out modified: regress/sql/cypher_set.sql modified: src/backend/parser/cypher_clause.c modified: src/backend/utils/adt/agtype_ext.c --- regress/expected/cypher_set.out | 266 +++++++++++++++++++++++++++++ regress/sql/cypher_set.sql | 164 ++++++++++++++++++ src/backend/parser/cypher_clause.c | 19 ++- src/backend/utils/adt/agtype_ext.c | 8 +- 4 files changed, 451 insertions(+), 6 deletions(-) diff --git a/regress/expected/cypher_set.out b/regress/expected/cypher_set.out index 1d24a7f9b..239234ed6 100644 --- a/regress/expected/cypher_set.out +++ b/regress/expected/cypher_set.out @@ -988,6 +988,245 @@ SELECT * FROM cypher('issue_1634', $$ MATCH (u) DELETE (u) $$) AS (u agtype); --- (0 rows) +-- +-- Issue 1884: column reference is ambiguous when using same variable in +-- SET expression and RETURN clause +-- +-- These tests cover: +-- 1. "column reference is ambiguous" error when variable is used in both +-- SET expression RHS (e.g., SET n.prop = n) and RETURN clause +-- 2. "Invalid AGT header value" error caused by incorrect offset calculation +-- when nested VERTEX/EDGE/PATH values are serialized in properties +-- +-- Tests use isolated data to keep output manageable and avoid cumulative nesting +-- +SELECT * FROM create_graph('issue_1884'); +NOTICE: graph "issue_1884" has been created + create_graph +-------------- + +(1 row) + +-- ============================================================================ +-- Test Group A: Basic "column reference is ambiguous" fix (Issue 1884) +-- ============================================================================ +-- Test A1: Core issue - SET n.prop = n with RETURN n (the original bug) +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestA1 {name: 'A1'}) + SET n.self = n + RETURN n +$$) AS (result agtype); + result +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "TestA1", "properties": {"name": "A1", "self": {"id": 844424930131969, "label": "TestA1", "properties": {"name": "A1"}}::vertex}}::vertex +(1 row) + +-- Test A2: Multiple variables in SET and RETURN +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestA2 {name: 'A'})-[e:LINK {w: 1}]->(b:TestA2 {name: 'B'}) + SET a.edge = e, b.edge = e + RETURN a, e, b +$$) AS (a agtype, e agtype, b agtype); + a | e | b +-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 1125899906842625, "label": "TestA2", "properties": {"edge": {"id": 1407374883553281, "label": "LINK", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {"w": 1}}::edge, "name": "A"}}::vertex | {"id": 1407374883553281, "label": "LINK", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {"w": 1}}::edge | {"id": 1125899906842626, "label": "TestA2", "properties": {"edge": {"id": 1407374883553281, "label": "LINK", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {"w": 1}}::edge, "name": "B"}}::vertex +(1 row) + +-- Test A3: SET edge property to node reference +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestA3 {name: 'X'})-[e:REL]->(b:TestA3 {name: 'Y'}) + SET e.src = a, e.dst = b + RETURN e +$$) AS (e agtype); + e +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + {"id": 1970324836974593, "label": "REL", "end_id": 1688849860263938, "start_id": 1688849860263937, "properties": {"dst": {"id": 1688849860263938, "label": "TestA3", "properties": {"name": "Y"}}::vertex, "src": {"id": 1688849860263937, "label": "TestA3", "properties": {"name": "X"}}::vertex}}::edge +(1 row) + +-- ============================================================================ +-- Test Group B: Nested VERTEX/EDGE/PATH serialization (offset error fix) +-- ============================================================================ +-- Test B1: Vertex nested in vertex property (tests VERTEX serialization) +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestB1 {val: 1}) + SET n.copy = n + RETURN n +$$) AS (result agtype); + result +---------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 2251799813685249, "label": "TestB1", "properties": {"val": 1, "copy": {"id": 2251799813685249, "label": "TestB1", "properties": {"val": 1}}::vertex}}::vertex +(1 row) + +-- Verify nested vertex can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB1) + RETURN n.copy +$$) AS (copy agtype); + copy +------------------------------------------------------------------------------- + {"id": 2251799813685249, "label": "TestB1", "properties": {"val": 1}}::vertex +(1 row) + +-- Test B2: Edge nested in node property (tests EDGE serialization) +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestB2 {name: 'start'})-[e:B2REL {x: 100}]->(b:TestB2 {name: 'end'}) + SET a.myEdge = e + RETURN a +$$) AS (a agtype); + a +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 2533274790395905, "label": "TestB2", "properties": {"name": "start", "myEdge": {"id": 2814749767106561, "label": "B2REL", "end_id": 2533274790395906, "start_id": 2533274790395905, "properties": {"x": 100}}::edge}}::vertex +(1 row) + +-- Verify nested edge can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB2 {name: 'start'}) + RETURN n.myEdge +$$) AS (edge agtype); + edge +-------------------------------------------------------------------------------------------------------------------------------------- + {"id": 2814749767106561, "label": "B2REL", "end_id": 2533274790395906, "start_id": 2533274790395905, "properties": {"x": 100}}::edge +(1 row) + +-- Test B3: Path nested in node property (tests PATH serialization) +-- First create the pattern +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestB3)-[e:B3REL]->(b:TestB3) + RETURN a +$$) AS (a agtype); + a +----------------------------------------------------------------------- + {"id": 3096224743817217, "label": "TestB3", "properties": {}}::vertex +(1 row) + +-- Then match the path and set it (MATCH only sees committed data) +SELECT * FROM cypher('issue_1884', $$ + MATCH p = (a:TestB3)-[e:B3REL]->(b:TestB3) + SET a.myPath = p + RETURN a +$$) AS (a agtype); + a +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 3096224743817217, "label": "TestB3", "properties": {"myPath": [{"id": 3096224743817217, "label": "TestB3", "properties": {}}::vertex, {"id": 3377699720527873, "label": "B3REL", "end_id": 3096224743817218, "start_id": 3096224743817217, "properties": {}}::edge, {"id": 3096224743817218, "label": "TestB3", "properties": {}}::vertex]::path}}::vertex +(1 row) + +-- Verify nested path can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB3) + WHERE n.myPath IS NOT NULL + RETURN n.myPath +$$) AS (path agtype); + path +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + [{"id": 3096224743817217, "label": "TestB3", "properties": {}}::vertex, {"id": 3377699720527873, "label": "B3REL", "end_id": 3096224743817218, "start_id": 3096224743817217, "properties": {}}::edge, {"id": 3096224743817218, "label": "TestB3", "properties": {}}::vertex]::path +(1 row) + +-- ============================================================================ +-- Test Group C: Nested structures in arrays and maps +-- ============================================================================ +-- Test C1: Vertex in array +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestC1 {tag: 'arrtest'}) + SET n.arr = [n] + RETURN n +$$) AS (result agtype); + result +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 3659174697238529, "label": "TestC1", "properties": {"arr": [{"id": 3659174697238529, "label": "TestC1", "properties": {"tag": "arrtest"}}::vertex], "tag": "arrtest"}}::vertex +(1 row) + +-- Verify array with nested vertex +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestC1) + RETURN n.arr[0] +$$) AS (elem agtype); + elem +--------------------------------------------------------------------------------------- + {"id": 3659174697238529, "label": "TestC1", "properties": {"tag": "arrtest"}}::vertex +(1 row) + +-- Test C2: Vertex in map +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestC2 {tag: 'maptest'}) + SET n.obj = {node: n} + RETURN n +$$) AS (result agtype); + result +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 3940649673949185, "label": "TestC2", "properties": {"obj": {"node": {"id": 3940649673949185, "label": "TestC2", "properties": {"tag": "maptest"}}::vertex}, "tag": "maptest"}}::vertex +(1 row) + +-- Verify map with nested vertex +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestC2) + RETURN n.obj.node +$$) AS (node agtype); + node +--------------------------------------------------------------------------------------- + {"id": 3940649673949185, "label": "TestC2", "properties": {"tag": "maptest"}}::vertex +(1 row) + +-- ============================================================================ +-- Test Group D: MERGE and CREATE with self-reference +-- ============================================================================ +-- Test D1: MERGE with SET self-reference +SELECT * FROM cypher('issue_1884', $$ + MERGE (n:TestD1 {name: 'merged'}) + SET n.ref = n + RETURN n +$$) AS (result agtype); + result +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 4222124650659841, "label": "TestD1", "properties": {"ref": {"id": 4222124650659841, "label": "TestD1", "properties": {"name": "merged"}}::vertex, "name": "merged"}}::vertex +(1 row) + +-- Test D2: CREATE with SET self-reference +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestD2 {name: 'created'}) + SET n.ref = n + RETURN n +$$) AS (result agtype); + result +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 4503599627370497, "label": "TestD2", "properties": {"ref": {"id": 4503599627370497, "label": "TestD2", "properties": {"name": "created"}}::vertex, "name": "created"}}::vertex +(1 row) + +-- ============================================================================ +-- Test Group E: Functions with variable references +-- ============================================================================ +-- Test E1: id() and label() functions +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestE1 {name: 'functest'}) + SET n.myId = id(n), n.myLabel = label(n) + RETURN n +$$) AS (result agtype); + result +---------------------------------------------------------------------------------------------------------------------------------------- + {"id": 4785074604081153, "label": "TestE1", "properties": {"myId": 4785074604081153, "name": "functest", "myLabel": "TestE1"}}::vertex +(1 row) + +-- Test E2: nodes() and relationships() with path +-- First create the pattern +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestE2)-[e:E2REL]->(b:TestE2) + RETURN a +$$) AS (a agtype); + a +----------------------------------------------------------------------- + {"id": 5066549580791809, "label": "TestE2", "properties": {}}::vertex +(1 row) + +-- Then match the path and extract nodes/relationships (MATCH only sees committed data) +SELECT * FROM cypher('issue_1884', $$ + MATCH p = (a:TestE2)-[e:E2REL]->(b:TestE2) + SET a.pathNodes = nodes(p), a.pathRels = relationships(p) + RETURN a +$$) AS (a agtype); + a +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + {"id": 5066549580791809, "label": "TestE2", "properties": {"pathRels": [{"id": 5348024557502465, "label": "E2REL", "end_id": 5066549580791810, "start_id": 5066549580791809, "properties": {}}::edge], "pathNodes": [{"id": 5066549580791809, "label": "TestE2", "properties": {}}::vertex, {"id": 5066549580791810, "label": "TestE2", "properties": {}}::vertex]}}::vertex +(1 row) + -- -- Clean up -- @@ -1038,6 +1277,33 @@ NOTICE: graph "issue_1634" has been dropped (1 row) +SELECT drop_graph('issue_1884', true); +NOTICE: drop cascades to 19 other objects +DETAIL: drop cascades to table issue_1884._ag_label_vertex +drop cascades to table issue_1884._ag_label_edge +drop cascades to table issue_1884."TestA1" +drop cascades to table issue_1884."TestA2" +drop cascades to table issue_1884."LINK" +drop cascades to table issue_1884."TestA3" +drop cascades to table issue_1884."REL" +drop cascades to table issue_1884."TestB1" +drop cascades to table issue_1884."TestB2" +drop cascades to table issue_1884."B2REL" +drop cascades to table issue_1884."TestB3" +drop cascades to table issue_1884."B3REL" +drop cascades to table issue_1884."TestC1" +drop cascades to table issue_1884."TestC2" +drop cascades to table issue_1884."TestD1" +drop cascades to table issue_1884."TestD2" +drop cascades to table issue_1884."TestE1" +drop cascades to table issue_1884."TestE2" +drop cascades to table issue_1884."E2REL" +NOTICE: graph "issue_1884" has been dropped + drop_graph +------------ + +(1 row) + -- -- End -- diff --git a/regress/sql/cypher_set.sql b/regress/sql/cypher_set.sql index a2667153d..e745d5d6e 100644 --- a/regress/sql/cypher_set.sql +++ b/regress/sql/cypher_set.sql @@ -379,6 +379,169 @@ SELECT * FROM cypher('issue_1634', $$ MERGE (v:PERSION {id: '1'}) SELECT * FROM cypher('issue_1634', $$ MATCH (u) DELETE (u) $$) AS (u agtype); +-- +-- Issue 1884: column reference is ambiguous when using same variable in +-- SET expression and RETURN clause +-- +-- These tests cover: +-- 1. "column reference is ambiguous" error when variable is used in both +-- SET expression RHS (e.g., SET n.prop = n) and RETURN clause +-- 2. "Invalid AGT header value" error caused by incorrect offset calculation +-- when nested VERTEX/EDGE/PATH values are serialized in properties +-- +-- Tests use isolated data to keep output manageable and avoid cumulative nesting +-- +SELECT * FROM create_graph('issue_1884'); + +-- ============================================================================ +-- Test Group A: Basic "column reference is ambiguous" fix (Issue 1884) +-- ============================================================================ + +-- Test A1: Core issue - SET n.prop = n with RETURN n (the original bug) +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestA1 {name: 'A1'}) + SET n.self = n + RETURN n +$$) AS (result agtype); + +-- Test A2: Multiple variables in SET and RETURN +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestA2 {name: 'A'})-[e:LINK {w: 1}]->(b:TestA2 {name: 'B'}) + SET a.edge = e, b.edge = e + RETURN a, e, b +$$) AS (a agtype, e agtype, b agtype); + +-- Test A3: SET edge property to node reference +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestA3 {name: 'X'})-[e:REL]->(b:TestA3 {name: 'Y'}) + SET e.src = a, e.dst = b + RETURN e +$$) AS (e agtype); + +-- ============================================================================ +-- Test Group B: Nested VERTEX/EDGE/PATH serialization (offset error fix) +-- ============================================================================ + +-- Test B1: Vertex nested in vertex property (tests VERTEX serialization) +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestB1 {val: 1}) + SET n.copy = n + RETURN n +$$) AS (result agtype); + +-- Verify nested vertex can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB1) + RETURN n.copy +$$) AS (copy agtype); + +-- Test B2: Edge nested in node property (tests EDGE serialization) +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestB2 {name: 'start'})-[e:B2REL {x: 100}]->(b:TestB2 {name: 'end'}) + SET a.myEdge = e + RETURN a +$$) AS (a agtype); + +-- Verify nested edge can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB2 {name: 'start'}) + RETURN n.myEdge +$$) AS (edge agtype); + +-- Test B3: Path nested in node property (tests PATH serialization) +-- First create the pattern +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestB3)-[e:B3REL]->(b:TestB3) + RETURN a +$$) AS (a agtype); + +-- Then match the path and set it (MATCH only sees committed data) +SELECT * FROM cypher('issue_1884', $$ + MATCH p = (a:TestB3)-[e:B3REL]->(b:TestB3) + SET a.myPath = p + RETURN a +$$) AS (a agtype); + +-- Verify nested path can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB3) + WHERE n.myPath IS NOT NULL + RETURN n.myPath +$$) AS (path agtype); + +-- ============================================================================ +-- Test Group C: Nested structures in arrays and maps +-- ============================================================================ + +-- Test C1: Vertex in array +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestC1 {tag: 'arrtest'}) + SET n.arr = [n] + RETURN n +$$) AS (result agtype); + +-- Verify array with nested vertex +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestC1) + RETURN n.arr[0] +$$) AS (elem agtype); + +-- Test C2: Vertex in map +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestC2 {tag: 'maptest'}) + SET n.obj = {node: n} + RETURN n +$$) AS (result agtype); + +-- Verify map with nested vertex +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestC2) + RETURN n.obj.node +$$) AS (node agtype); + +-- ============================================================================ +-- Test Group D: MERGE and CREATE with self-reference +-- ============================================================================ + +-- Test D1: MERGE with SET self-reference +SELECT * FROM cypher('issue_1884', $$ + MERGE (n:TestD1 {name: 'merged'}) + SET n.ref = n + RETURN n +$$) AS (result agtype); + +-- Test D2: CREATE with SET self-reference +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestD2 {name: 'created'}) + SET n.ref = n + RETURN n +$$) AS (result agtype); + +-- ============================================================================ +-- Test Group E: Functions with variable references +-- ============================================================================ + +-- Test E1: id() and label() functions +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestE1 {name: 'functest'}) + SET n.myId = id(n), n.myLabel = label(n) + RETURN n +$$) AS (result agtype); + +-- Test E2: nodes() and relationships() with path +-- First create the pattern +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestE2)-[e:E2REL]->(b:TestE2) + RETURN a +$$) AS (a agtype); + +-- Then match the path and extract nodes/relationships (MATCH only sees committed data) +SELECT * FROM cypher('issue_1884', $$ + MATCH p = (a:TestE2)-[e:E2REL]->(b:TestE2) + SET a.pathNodes = nodes(p), a.pathRels = relationships(p) + RETURN a +$$) AS (a agtype); + -- -- Clean up -- @@ -387,6 +550,7 @@ DROP FUNCTION set_test; SELECT drop_graph('cypher_set', true); SELECT drop_graph('cypher_set_1', true); SELECT drop_graph('issue_1634', true); +SELECT drop_graph('issue_1884', true); -- -- End diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index aac6cfb6a..214fdaa49 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -71,6 +71,7 @@ #define AGE_VARNAME_MERGE_CLAUSE AGE_DEFAULT_VARNAME_PREFIX"merge_clause" #define AGE_VARNAME_ID AGE_DEFAULT_VARNAME_PREFIX"id" #define AGE_VARNAME_SET_CLAUSE AGE_DEFAULT_VARNAME_PREFIX"set_clause" +#define AGE_VARNAME_SET_VALUE AGE_DEFAULT_VARNAME_PREFIX"set_value" /* * In the transformation stage, we need to track @@ -1906,10 +1907,24 @@ cypher_update_information *transform_cypher_set_item_list( ((cypher_map*)set_item->expr)->keep_null = set_item->is_add; } - /* create target entry for the new property value */ + /* + * Create target entry for the new property value. + * + * We use a hidden variable name (AGE_VARNAME_SET_VALUE) for the + * SET expression value to prevent column name conflicts. This is + * necessary when the same variable is used on both the LHS and RHS + * of a SET clause (e.g., SET n.prop = n). Without this, the column + * name derived from the expression (e.g., "n") would duplicate the + * existing column name from the MATCH clause, causing a "column + * reference is ambiguous" error in subsequent clauses like RETURN. + * + * The hidden variable name will be filtered out by expand_pnsi_attrs + * when the targetlist is expanded for subsequent clauses. + */ item->prop_position = (AttrNumber)pstate->p_next_resno; target_item = transform_cypher_item(cpstate, set_item->expr, NULL, - EXPR_KIND_SELECT_TARGET, NULL, + EXPR_KIND_SELECT_TARGET, + AGE_VARNAME_SET_VALUE, false); if (nodeTag(target_item->expr) == T_Aggref) diff --git a/src/backend/utils/adt/agtype_ext.c b/src/backend/utils/adt/agtype_ext.c index 8fc6600d1..7a0ea991d 100644 --- a/src/backend/utils/adt/agtype_ext.c +++ b/src/backend/utils/adt/agtype_ext.c @@ -89,7 +89,7 @@ bool ag_serialize_extended_type(StringInfo buffer, agtentry *agtentry, object_ae += pad_buffer_to_int(buffer); *agtentry = AGTENTRY_IS_AGTYPE | - ((AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); + (padlen + (AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); break; } @@ -109,7 +109,7 @@ bool ag_serialize_extended_type(StringInfo buffer, agtentry *agtentry, object_ae += pad_buffer_to_int(buffer); *agtentry = AGTENTRY_IS_AGTYPE | - ((AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); + (padlen + (AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); break; } @@ -129,7 +129,7 @@ bool ag_serialize_extended_type(StringInfo buffer, agtentry *agtentry, object_ae += pad_buffer_to_int(buffer); *agtentry = AGTENTRY_IS_AGTYPE | - ((AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); + (padlen + (AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); break; } @@ -175,7 +175,7 @@ void ag_deserialize_extended_type(char *base_addr, uint32 offset, break; default: - elog(ERROR, "Invalid AGT header value."); + ereport(ERROR, (errmsg("Invalid AGT header value: 0x%08x", agt_header))); } } From 687f5522b697ecf753ab35e175918d5c2d307821 Mon Sep 17 00:00:00 2001 From: Muhammad Taha Naveed Date: Tue, 20 Jan 2026 23:44:19 +0500 Subject: [PATCH 19/20] Add RLS support and fix permission checks (#2309) - Previously, age only set ACL_SELECT and ACL_INSERT in RTEPermissionInfo, bypassing pg's privilege checking for DELETE and UPDATE operations. - Additionally, RLS policies were not enforced because AGE uses CMD_SELECT for all Cypher queries, causing the rewriter to skip RLS policy application. Permission fixes: - Add ACL_DELETE permission flag for DELETE clause operations - Add ACL_UPDATE permission flag for SET/REMOVE clause operations - Recursively search RTEs including subqueries for permission info RLS support: - Implemented at executor level because age transforms all cypher queries to CMD_SELECT, so pg's rewriter never adds RLS policies for INSERT/UPDATE/DELETE operations. There isnt an appropriate rewriter hook to modify this behavior, so we do it in executor instead. - Add setup_wcos() to apply WITH CHECK policies at execution time for CREATE, SET, and MERGE operations - Add setup_security_quals() and check_security_quals() to apply USING policies for UPDATE and DELETE operations - USING policies silently filter rows (matching pg behavior) - WITH CHECK policies raise errors on violation - DETACH DELETE raises error if edge RLS blocks deletion to prevent dangling edges - Add permission checks and rls in startnode/endnode functions - Add regression tests Assisted-by AI --- Makefile | 3 +- regress/expected/security.out | 1657 ++++++++++++++++++++++++++ regress/sql/security.sql | 1451 ++++++++++++++++++++++ src/backend/executor/cypher_create.c | 8 + src/backend/executor/cypher_delete.c | 99 +- src/backend/executor/cypher_merge.c | 10 +- src/backend/executor/cypher_set.c | 89 +- src/backend/executor/cypher_utils.c | 780 ++++++++++++ src/backend/parser/cypher_clause.c | 103 ++ src/backend/utils/adt/agtype.c | 27 +- src/include/executor/cypher_utils.h | 22 + 11 files changed, 4243 insertions(+), 6 deletions(-) create mode 100644 regress/expected/security.out create mode 100644 regress/sql/security.sql diff --git a/Makefile b/Makefile index ffad7d6af..4cd623484 100644 --- a/Makefile +++ b/Makefile @@ -113,7 +113,8 @@ REGRESS = scan \ jsonb_operators \ list_comprehension \ map_projection \ - direct_field_access + direct_field_access \ + security ifneq ($(EXTRA_TESTS),) REGRESS += $(EXTRA_TESTS) diff --git a/regress/expected/security.out b/regress/expected/security.out new file mode 100644 index 000000000..59e58cb05 --- /dev/null +++ b/regress/expected/security.out @@ -0,0 +1,1657 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +LOAD 'age'; +SET search_path TO ag_catalog; +-- +-- Test Privileges +-- +-- +-- Setup: Create test graph and data as superuser +-- +SELECT create_graph('security_test'); +NOTICE: graph "security_test" has been created + create_graph +-------------- + +(1 row) + +-- Create test vertices +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Alice', age: 30}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Bob', age: 25}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('security_test', $$ + CREATE (:Document {title: 'Secret', content: 'classified'}) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Create test edges +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:KNOWS {since: 2020}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (d:Document) + CREATE (a)-[:OWNS]->(d) +$$) AS (a agtype); + a +--- +(0 rows) + +-- +-- Create test roles with different permission levels +-- +-- Role with only SELECT (read-only) +CREATE ROLE security_test_readonly LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_readonly; +GRANT SELECT ON ALL TABLES IN SCHEMA security_test TO security_test_readonly; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_readonly; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_readonly; +-- Role with SELECT and INSERT +CREATE ROLE security_test_insert LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_insert; +GRANT SELECT, INSERT ON ALL TABLES IN SCHEMA security_test TO security_test_insert; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_insert; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_insert; +-- Grant sequence usage for ID generation +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_insert; +-- Role with SELECT and UPDATE +CREATE ROLE security_test_update LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_update; +GRANT SELECT, UPDATE ON ALL TABLES IN SCHEMA security_test TO security_test_update; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_update; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_update; +-- Role with SELECT and DELETE +CREATE ROLE security_test_delete LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_delete; +GRANT SELECT, DELETE ON ALL TABLES IN SCHEMA security_test TO security_test_delete; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_delete; +CREATE ROLE security_test_detach_delete LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_detach_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA security_test TO security_test_detach_delete; +GRANT DELETE ON security_test."Person" TO security_test_detach_delete; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_detach_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_detach_delete; +-- Role with all permissions +CREATE ROLE security_test_full LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_full; +GRANT ALL ON ALL TABLES IN SCHEMA security_test TO security_test_full; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_full; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_full; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_full; +-- Role with NO SELECT on graph tables (to test read failures) +CREATE ROLE security_test_noread LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_noread; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_noread; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_noread; +-- No SELECT on security_test tables +-- ============================================================================ +-- PART 1: SELECT Permission Tests - Failure Cases (No Read Permission) +-- ============================================================================ +SET ROLE security_test_noread; +-- Test: MATCH on vertices should fail without SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person) RETURN p.name +$$) AS (name agtype); +ERROR: permission denied for table Person +-- Test: MATCH on edges should fail without SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH ()-[k:KNOWS]->() RETURN k +$$) AS (k agtype); +ERROR: permission denied for table _ag_label_vertex +-- Test: MATCH with path should fail +SELECT * FROM cypher('security_test', $$ + MATCH (a)-[e]->(b) RETURN a, e, b +$$) AS (a agtype, e agtype, b agtype); +ERROR: permission denied for table _ag_label_vertex +RESET ROLE; +-- Create role with SELECT only on base label tables, not child labels +-- NOTE: PostgreSQL inheritance allows access to child table rows when querying +-- through a parent table. This is expected behavior - SELECT on _ag_label_vertex +-- allows reading all vertices (including Person, Document) via inheritance. +CREATE ROLE security_test_base_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_base_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_base_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_base_only; +-- Only grant SELECT on base tables, NOT on Person, Document, KNOWS, OWNS +GRANT SELECT ON security_test._ag_label_vertex TO security_test_base_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_base_only; +SET ROLE security_test_base_only; +-- Test: MATCH (n) succeeds because PostgreSQL inheritance allows access to child rows +-- when querying through parent table. Permission on _ag_label_vertex grants read +-- access to all vertices via inheritance hierarchy. +SELECT * FROM cypher('security_test', $$ + MATCH (n) RETURN n +$$) AS (n agtype); + n +------------------------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Person", "properties": {"age": 30, "name": "Alice"}}::vertex + {"id": 844424930131970, "label": "Person", "properties": {"age": 25, "name": "Bob"}}::vertex + {"id": 1125899906842625, "label": "Document", "properties": {"title": "Secret", "content": "classified"}}::vertex +(3 rows) + +-- Test: MATCH ()-[e]->() succeeds via inheritance (same reason as above) +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e]->() RETURN e +$$) AS (e agtype); + e +----------------------------------------------------------------------------------------------------------------------------------------- + {"id": 1407374883553281, "label": "KNOWS", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {"since": 2020}}::edge + {"id": 1688849860263937, "label": "OWNS", "end_id": 1125899906842625, "start_id": 844424930131969, "properties": {}}::edge +(2 rows) + +-- ============================================================================ +-- PART 2: SELECT Permission Tests - Success Cases (Read-Only Role) +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_readonly; +-- Test: MATCH should succeed with SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +--------- + "Alice" + "Bob" +(2 rows) + +-- Test: MATCH with edges should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person)-[k:KNOWS]->(b:Person) + RETURN a.name, b.name +$$) AS (a agtype, b agtype); + a | b +---------+------- + "Alice" | "Bob" +(1 row) + +-- Test: MATCH across multiple labels should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person)-[:OWNS]->(d:Document) + RETURN p.name, d.title +$$) AS (person agtype, doc agtype); + person | doc +---------+---------- + "Alice" | "Secret" +(1 row) + +-- ============================================================================ +-- PART 3: INSERT Permission Tests (CREATE clause) +-- ============================================================================ +-- Test: CREATE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Charlie'}) +$$) AS (a agtype); +ERROR: permission denied for table Person +-- Test: CREATE edge should fail +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:FRIENDS]->(b) +$$) AS (a agtype); +ERROR: permission denied for schema security_test +LINE 1: SELECT * FROM cypher('security_test', $$ + ^ +RESET ROLE; +SET ROLE security_test_insert; +-- Test: CREATE vertex should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Charlie', age: 35}) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Test: CREATE edge should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Charlie'}), (b:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS {since: 2023}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Verify the inserts worked +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) RETURN p.name, p.age +$$) AS (name agtype, age agtype); + name | age +-----------+----- + "Charlie" | 35 +(1 row) + +-- ============================================================================ +-- PART 4: UPDATE Permission Tests (SET clause) +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_readonly; +-- Test: SET should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) + SET p.age = 31 + RETURN p +$$) AS (p agtype); +ERROR: permission denied for table Person +-- Test: SET on edge should fail +SELECT * FROM cypher('security_test', $$ + MATCH ()-[k:KNOWS]->() + SET k.since = 2021 + RETURN k +$$) AS (k agtype); +ERROR: permission denied for table KNOWS +RESET ROLE; +SET ROLE security_test_update; +-- Test: SET should succeed with UPDATE permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) + SET p.age = 31 + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + name | age +---------+----- + "Alice" | 31 +(1 row) + +-- Test: SET on edge should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'})-[k:KNOWS]->(b:Person {name: 'Bob'}) + SET k.since = 2019 + RETURN k.since +$$) AS (since agtype); + since +------- + 2019 +(1 row) + +-- Test: SET with map update should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + SET p += {hobby: 'reading'} + RETURN p.name, p.hobby +$$) AS (name agtype, hobby agtype); + name | hobby +-------+----------- + "Bob" | "reading" +(1 row) + +-- ============================================================================ +-- PART 5: UPDATE Permission Tests (REMOVE clause) +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_readonly; +-- Test: REMOVE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + REMOVE p.hobby + RETURN p +$$) AS (p agtype); +ERROR: permission denied for table Person +RESET ROLE; +SET ROLE security_test_update; +-- Test: REMOVE should succeed with UPDATE permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + REMOVE p.hobby + RETURN p.name, p.hobby +$$) AS (name agtype, hobby agtype); + name | hobby +-------+------- + "Bob" | +(1 row) + +-- ============================================================================ +-- PART 6: DELETE Permission Tests +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_readonly; +-- Test: DELETE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); +ERROR: permission denied for table Person +RESET ROLE; +SET ROLE security_test_update; +-- Test: DELETE should fail with only UPDATE permission (need DELETE) +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); +ERROR: permission denied for table Person +RESET ROLE; +SET ROLE security_test_delete; +-- Test: DELETE vertex should succeed with DELETE permission +-- First delete the edge connected to Charlie +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'})-[k:KNOWS]->() + DELETE k +$$) AS (a agtype); + a +--- +(0 rows) + +-- Now delete the vertex +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- Verify deletion +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) RETURN p +$$) AS (p agtype); + p +--- +(0 rows) + +-- ============================================================================ +-- PART 7: DETACH DELETE Tests +-- ============================================================================ +RESET ROLE; +-- Create a new vertex with edge for DETACH DELETE test +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Dave', age: 40}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (d:Person {name: 'Dave'}) + CREATE (a)-[:KNOWS {since: 2022}]->(d) +$$) AS (a agtype); + a +--- +(0 rows) + +SET ROLE security_test_detach_delete; +-- Test: DETACH DELETE should fail without DELETE on edge table +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) + DETACH DELETE p +$$) AS (a agtype); +ERROR: permission denied for table KNOWS +RESET ROLE; +GRANT DELETE ON security_test."KNOWS" TO security_test_detach_delete; +SET ROLE security_test_detach_delete; +-- Test: DETACH DELETE should succeed now when user has DELETE on both vertex and edge tables +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) + DETACH DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- Verify deletion +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) RETURN p +$$) AS (p agtype); + p +--- +(0 rows) + +-- ============================================================================ +-- PART 8: MERGE Permission Tests +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_readonly; +-- Test: MERGE that would create should fail without INSERT +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve'}) + RETURN p +$$) AS (p agtype); +ERROR: permission denied for table Person +RESET ROLE; +SET ROLE security_test_insert; +-- Test: MERGE that creates should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve', age: 28}) + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + name | age +-------+----- + "Eve" | 28 +(1 row) + +-- Test: MERGE that matches existing should succeed (only needs SELECT) +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve'}) + RETURN p.name +$$) AS (name agtype); + name +------- + "Eve" +(1 row) + +-- ============================================================================ +-- PART 9: Full Permission Role Tests +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_full; +-- Full permission role should be able to do everything +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Frank', age: 50}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Frank'}) + SET p.age = 51 + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + name | age +---------+----- + "Frank" | 51 +(1 row) + +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Frank'}) + DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- ============================================================================ +-- PART 10: Permission on Specific Labels +-- ============================================================================ +RESET ROLE; +-- Create a role with permission only on Person label, not Document +CREATE ROLE security_test_person_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_person_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_person_only; +GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA ag_catalog TO security_test_person_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_person_only; +-- Only grant permissions on Person table +GRANT SELECT, INSERT, UPDATE, DELETE ON security_test."Person" TO security_test_person_only; +GRANT SELECT ON security_test."KNOWS" TO security_test_person_only; +GRANT SELECT ON security_test._ag_label_vertex TO security_test_person_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_person_only; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_person_only; +SET ROLE security_test_person_only; +-- Test: Operations on Person should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) RETURN p.name +$$) AS (name agtype); + name +--------- + "Alice" +(1 row) + +-- Test: SELECT on Document should fail (no permission) +SELECT * FROM cypher('security_test', $$ + MATCH (d:Document) RETURN d.title +$$) AS (title agtype); +ERROR: permission denied for table Document +-- Test: CREATE Document should fail (no permission on Document table) +SELECT * FROM cypher('security_test', $$ + CREATE (:Document {title: 'New Doc'}) +$$) AS (a agtype); +ERROR: permission denied for table Document +-- ============================================================================ +-- PART 11: Function EXECUTE Permission Tests +-- ============================================================================ +RESET ROLE; +-- Create role with no function execute permissions +CREATE ROLE security_test_noexec LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_noexec; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_noexec; +-- Revoke execute from PUBLIC on functions we want to test +REVOKE EXECUTE ON FUNCTION ag_catalog.create_graph(name) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.drop_graph(name, boolean) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.create_elabel(cstring, cstring) FROM PUBLIC; +SET ROLE security_test_noexec; +-- Test: create_graph should fail without EXECUTE permission +SELECT create_graph('unauthorized_graph'); +ERROR: permission denied for function create_graph +-- Test: drop_graph should fail without EXECUTE permission +SELECT drop_graph('security_test', true); +ERROR: permission denied for function drop_graph +-- Test: create_vlabel should fail without EXECUTE permission +SELECT create_vlabel('security_test', 'NewLabel'); +ERROR: permission denied for function create_vlabel +-- Test: create_elabel should fail without EXECUTE permission +SELECT create_elabel('security_test', 'NewEdge'); +ERROR: permission denied for function create_elabel +RESET ROLE; +-- Grant execute on specific function and test +GRANT EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) TO security_test_noexec; +SET ROLE security_test_noexec; +-- Test: create_vlabel should now get past execute check (will fail on schema permission instead) +SELECT create_vlabel('security_test', 'TestLabel'); +ERROR: permission denied for schema security_test +-- Test: create_graph should still fail with execute permission denied +SELECT create_graph('unauthorized_graph'); +ERROR: permission denied for function create_graph +RESET ROLE; +-- Restore execute permissions to PUBLIC +GRANT EXECUTE ON FUNCTION ag_catalog.create_graph(name) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.drop_graph(name, boolean) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.create_elabel(cstring, cstring) TO PUBLIC; +-- ============================================================================ +-- PART 12: startNode/endNode Permission Tests +-- ============================================================================ +-- Create role with SELECT on base tables but NOT on Person label +CREATE ROLE security_test_edge_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_edge_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_edge_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_edge_only; +GRANT SELECT ON security_test."KNOWS" TO security_test_edge_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_edge_only; +GRANT SELECT ON security_test._ag_label_vertex TO security_test_edge_only; +-- Note: NOT granting SELECT on security_test."Person" +SET ROLE security_test_edge_only; +-- Test: endNode fails without SELECT on Person table +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN endNode(e) +$$) AS (end_vertex agtype); +ERROR: permission denied for table Person +-- Test: startNode fails without SELECT on Person table +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN startNode(e) +$$) AS (start_vertex agtype); +ERROR: permission denied for table Person +RESET ROLE; +-- Grant SELECT on Person and verify success +GRANT SELECT ON security_test."Person" TO security_test_edge_only; +SET ROLE security_test_edge_only; +-- Test: Should now succeed with SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN startNode(e).name, endNode(e).name +$$) AS (start_name agtype, end_name agtype); + start_name | end_name +------------+---------- + "Alice" | "Bob" +(1 row) + +RESET ROLE; +-- ============================================================================ +-- Cleanup +-- ============================================================================ +RESET ROLE; +-- Drop all owned objects and privileges for each role, then drop the role +DROP OWNED BY security_test_noread CASCADE; +DROP ROLE security_test_noread; +DROP OWNED BY security_test_base_only CASCADE; +DROP ROLE security_test_base_only; +DROP OWNED BY security_test_readonly CASCADE; +DROP ROLE security_test_readonly; +DROP OWNED BY security_test_insert CASCADE; +DROP ROLE security_test_insert; +DROP OWNED BY security_test_update CASCADE; +DROP ROLE security_test_update; +DROP OWNED BY security_test_delete CASCADE; +DROP ROLE security_test_delete; +DROP OWNED BY security_test_detach_delete CASCADE; +DROP ROLE security_test_detach_delete; +DROP OWNED BY security_test_full CASCADE; +DROP ROLE security_test_full; +DROP OWNED BY security_test_person_only CASCADE; +DROP ROLE security_test_person_only; +DROP OWNED BY security_test_noexec CASCADE; +DROP ROLE security_test_noexec; +DROP OWNED BY security_test_edge_only CASCADE; +DROP ROLE security_test_edge_only; +-- Drop test graph +SELECT drop_graph('security_test', true); +NOTICE: drop cascades to 6 other objects +DETAIL: drop cascades to table security_test._ag_label_vertex +drop cascades to table security_test._ag_label_edge +drop cascades to table security_test."Person" +drop cascades to table security_test."Document" +drop cascades to table security_test."KNOWS" +drop cascades to table security_test."OWNS" +NOTICE: graph "security_test" has been dropped + drop_graph +------------ + +(1 row) + +-- +-- Row-Level Security (RLS) Tests +-- +-- +-- Setup: Create test graph, data and roles for RLS tests +-- +SELECT create_graph('rls_graph'); +NOTICE: graph "rls_graph" has been created + create_graph +-------------- + +(1 row) + +-- Create test roles +CREATE ROLE rls_user1 LOGIN; +CREATE ROLE rls_user2 LOGIN; +CREATE ROLE rls_admin LOGIN BYPASSRLS; -- Role that bypasses RLS +-- Create base test data FIRST (as superuser) - this creates the label tables +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Alice', owner: 'rls_user1', department: 'Engineering', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Bob', owner: 'rls_user2', department: 'Engineering', level: 2}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Charlie', owner: 'rls_user1', department: 'Sales', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Diana', owner: 'rls_user2', department: 'Sales', level: 3}) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Create a second vertex label for multi-label tests +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Document {title: 'Public Doc', classification: 'public', owner: 'rls_user1'}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Document {title: 'Secret Doc', classification: 'secret', owner: 'rls_user2'}) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Create edges +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:KNOWS {since: 2020, strength: 'weak'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Charlie'}), (b:Person {name: 'Diana'}) + CREATE (a)-[:KNOWS {since: 2021, strength: 'strong'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2022, strength: 'strong'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (d:Document {title: 'Public Doc'}) + CREATE (a)-[:AUTHORED]->(d) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Grant permissions AFTER creating tables (so Person, Document, KNOWS, AUTHORED exist) +GRANT USAGE ON SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; +GRANT ALL ON ALL TABLES IN SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; +GRANT USAGE ON SCHEMA ag_catalog TO rls_user1, rls_user2, rls_admin; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; +-- ============================================================================ +-- PART 1: Vertex SELECT Policies (USING clause) +-- ============================================================================ +-- Enable RLS on Person label +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Person" FORCE ROW LEVEL SECURITY; +-- 1.1: Basic ownership filtering +CREATE POLICY person_select_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); +-- Test as rls_user1 - should only see Alice and Charlie (owned by rls_user1) +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +----------- + "Alice" + "Charlie" +(2 rows) + +-- Test as rls_user2 - should only see Bob and Diana (owned by rls_user2) +SET ROLE rls_user2; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +--------- + "Bob" + "Diana" +(2 rows) + +RESET ROLE; +-- 1.2: Default deny - no permissive policies means no access +DROP POLICY person_select_own ON rls_graph."Person"; +-- With no policies, RLS blocks all access +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +------ +(0 rows) + +RESET ROLE; +-- ============================================================================ +-- PART 2: Vertex INSERT Policies (WITH CHECK) - CREATE +-- ============================================================================ +-- Allow SELECT for all (so we can verify results) +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); +-- 2.1: Basic WITH CHECK - users can only insert rows they own +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); +-- Test as rls_user1 - should succeed (owner matches current_user) +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'User1Created', owner: 'rls_user1', department: 'Test', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Test as rls_user1 - should FAIL (owner doesn't match current_user) +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'User1Fake', owner: 'rls_user2', department: 'Test', level: 1}) +$$) AS (a agtype); +ERROR: new row violates row-level security policy for table "Person" +RESET ROLE; +-- Verify only User1Created was created +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Test' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +---------------- + "User1Created" +(1 row) + +-- 2.2: Default deny for INSERT - no INSERT policy blocks all inserts +DROP POLICY person_insert_own ON rls_graph."Person"; +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'ShouldFail', owner: 'rls_user1', department: 'Blocked', level: 1}) +$$) AS (a agtype); +ERROR: new row violates row-level security policy for table "Person" +RESET ROLE; +-- Verify nothing was created in Blocked department +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Blocked' RETURN p.name +$$) AS (name agtype); + name +------ +(0 rows) + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Test' DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- ============================================================================ +-- PART 3: Vertex UPDATE Policies - SET +-- ============================================================================ +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); +-- 3.1: USING clause only - filter which rows can be updated +CREATE POLICY person_update_using ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user); +SET ROLE rls_user1; +-- Should succeed - rls_user1 owns Alice +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.updated = true RETURN p.name, p.updated +$$) AS (name agtype, updated agtype); + name | updated +---------+--------- + "Alice" | true +(1 row) + +-- Should silently skip - rls_user1 doesn't own Bob (USING filters it out) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) SET p.updated = true RETURN p.name, p.updated +$$) AS (name agtype, updated agtype); + name | updated +------+--------- +(0 rows) + +RESET ROLE; +-- Verify Alice was updated, Bob was not +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] RETURN p.name, p.updated ORDER BY p.name +$$) AS (name agtype, updated agtype); + name | updated +---------+--------- + "Alice" | true + "Bob" | +(2 rows) + +-- 3.2: WITH CHECK clause - validate new values +DROP POLICY person_update_using ON rls_graph."Person"; +CREATE POLICY person_update_check ON rls_graph."Person" + FOR UPDATE + USING (true) -- Can update any row + WITH CHECK (properties->>'"owner"' = current_user); -- But new value must keep owner +SET ROLE rls_user1; +-- Should succeed - modifying property but keeping owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.verified = true RETURN p.name, p.verified +$$) AS (name agtype, verified agtype); + name | verified +---------+---------- + "Alice" | true +(1 row) + +-- Should FAIL - trying to change owner to someone else +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.owner = 'rls_user2' RETURN p.owner +$$) AS (owner agtype); +ERROR: new row violates row-level security policy for table "Person" +RESET ROLE; +-- Verify owner wasn't changed +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) RETURN p.owner +$$) AS (owner agtype); + owner +------------- + "rls_user1" +(1 row) + +-- 3.3: Both USING and WITH CHECK together +DROP POLICY person_update_check ON rls_graph."Person"; +CREATE POLICY person_update_both ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user) + WITH CHECK (properties->>'"owner"' = current_user); +SET ROLE rls_user1; +-- Should succeed - owns Alice, keeping owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.status = 'active' RETURN p.name, p.status +$$) AS (name agtype, status agtype); + name | status +---------+---------- + "Alice" | "active" +(1 row) + +-- Should silently skip - doesn't own Bob (USING filters) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) SET p.status = 'active' RETURN p.name, p.status +$$) AS (name agtype, status agtype); + name | status +------+-------- +(0 rows) + +RESET ROLE; +-- ============================================================================ +-- PART 4: Vertex UPDATE Policies - REMOVE +-- ============================================================================ +-- Keep existing update policy, test REMOVE operation +SET ROLE rls_user1; +-- Should succeed - owns Alice +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) REMOVE p.status RETURN p.name, p.status +$$) AS (name agtype, status agtype); + name | status +---------+-------- + "Alice" | +(1 row) + +-- Should silently skip - doesn't own Bob +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) REMOVE p.department RETURN p.name, p.department +$$) AS (name agtype, dept agtype); + name | dept +------+------ +(0 rows) + +RESET ROLE; +-- Verify Bob still has department +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) RETURN p.department +$$) AS (dept agtype); + dept +--------------- + "Engineering" +(1 row) + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +DROP POLICY person_update_both ON rls_graph."Person"; +-- ============================================================================ +-- PART 5: Vertex DELETE Policies +-- ============================================================================ +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); +-- Create test data for delete tests +CREATE POLICY person_insert_all ON rls_graph."Person" + FOR INSERT WITH CHECK (true); +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest1', owner: 'rls_user1', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest2', owner: 'rls_user2', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest3', owner: 'rls_user1', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +DROP POLICY person_insert_all ON rls_graph."Person"; +-- 5.1: Basic USING filtering for DELETE +CREATE POLICY person_delete_own ON rls_graph."Person" + FOR DELETE + USING (properties->>'"owner"' = current_user); +SET ROLE rls_user1; +-- Should succeed - owns DeleteTest1 +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest1'}) DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- Should silently skip - doesn't own DeleteTest2 +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest2'}) DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +RESET ROLE; +-- Verify DeleteTest1 deleted, DeleteTest2 still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'DeleteTest' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +--------------- + "DeleteTest2" + "DeleteTest3" +(2 rows) + +-- 5.2: Default deny for DELETE - no policy blocks all deletes +DROP POLICY person_delete_own ON rls_graph."Person"; +SET ROLE rls_user1; +-- Should silently skip - no DELETE policy means default deny +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest3'}) DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +RESET ROLE; +-- Verify DeleteTest3 still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest3'}) RETURN p.name +$$) AS (name agtype); + name +--------------- + "DeleteTest3" +(1 row) + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'DeleteTest' DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- ============================================================================ +-- PART 6: MERGE Policies +-- ============================================================================ +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); +-- 6.1: MERGE creating new vertex - INSERT policy applies +SET ROLE rls_user1; +-- Should succeed - creating with correct owner +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'MergeNew1', owner: 'rls_user1', department: 'Merge', level: 1}) + RETURN p.name +$$) AS (name agtype); + name +------------- + "MergeNew1" +(1 row) + +-- Should FAIL - creating with wrong owner +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'MergeNew2', owner: 'rls_user2', department: 'Merge', level: 1}) + RETURN p.name +$$) AS (name agtype); +ERROR: new row violates row-level security policy for table "Person" +RESET ROLE; +-- 6.2: MERGE matching existing - only SELECT needed +SET ROLE rls_user1; +-- Should succeed - Alice exists and SELECT allowed +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'Alice'}) + RETURN p.name, p.owner +$$) AS (name agtype, owner agtype); + name | owner +---------+------------- + "Alice" | "rls_user1" +(1 row) + +RESET ROLE; +-- Verify only MergeNew1 was created +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Merge' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +------------- + "MergeNew1" +(1 row) + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +DROP POLICY person_insert_own ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Merge' DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- ============================================================================ +-- PART 7: Edge SELECT Policies +-- ============================================================================ +-- Disable vertex RLS, enable edge RLS +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" FORCE ROW LEVEL SECURITY; +-- Policy: Only see edges from 2021 or later +CREATE POLICY knows_select_recent ON rls_graph."KNOWS" + FOR SELECT + USING ((properties->>'"since"')::int >= 2021); +SET ROLE rls_user1; +-- Should only see 2021 and 2022 edges (not 2020) +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() RETURN k.since ORDER BY k.since +$$) AS (since agtype); + since +------- + 2021 + 2022 +(2 rows) + +RESET ROLE; +-- ============================================================================ +-- PART 8: Edge INSERT Policies (CREATE edge) +-- ============================================================================ +DROP POLICY knows_select_recent ON rls_graph."KNOWS"; +CREATE POLICY knows_select_all ON rls_graph."KNOWS" + FOR SELECT USING (true); +-- Policy: Can only create edges with strength = 'strong' +CREATE POLICY knows_insert_strong ON rls_graph."KNOWS" + FOR INSERT + WITH CHECK (properties->>'"strength"' = 'strong'); +SET ROLE rls_user1; +-- Should succeed - strength is 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Bob'}), (b:Person {name: 'Diana'}) + CREATE (a)-[:KNOWS {since: 2023, strength: 'strong'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Should FAIL - strength is 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Diana'}), (b:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS {since: 2023, strength: 'weak'}]->(b) +$$) AS (a agtype); +ERROR: new row violates row-level security policy for table "KNOWS" +RESET ROLE; +-- Verify only strong edge was created +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since = 2023 RETURN k.strength ORDER BY k.strength +$$) AS (strength agtype); + strength +---------- + "strong" +(1 row) + +-- cleanup +DROP POLICY knows_insert_strong ON rls_graph."KNOWS"; +-- ============================================================================ +-- PART 9: Edge UPDATE Policies (SET on edge) +-- ============================================================================ +-- Policy: Can only update edges with strength = 'strong' +CREATE POLICY knows_update_strong ON rls_graph."KNOWS" + FOR UPDATE + USING (properties->>'"strength"' = 'strong') + WITH CHECK (properties->>'"strength"' = 'strong'); +SET ROLE rls_user1; +-- Should succeed - edge has strength 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2021}]->() SET k.notes = 'updated' RETURN k.since, k.notes +$$) AS (since agtype, notes agtype); + since | notes +-------+----------- + 2021 | "updated" +(1 row) + +-- Should silently skip - edge has strength 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2020}]->() SET k.notes = 'updated' RETURN k.since, k.notes +$$) AS (since agtype, notes agtype); + since | notes +-------+------- +(0 rows) + +RESET ROLE; +-- Verify only 2021 edge was updated +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since IN [2020, 2021] RETURN k.since, k.notes ORDER BY k.since +$$) AS (since agtype, notes agtype); + since | notes +-------+----------- + 2020 | + 2021 | "updated" +(2 rows) + +-- cleanup +DROP POLICY knows_select_all ON rls_graph."KNOWS"; +DROP POLICY knows_update_strong ON rls_graph."KNOWS"; +-- ============================================================================ +-- PART 10: Edge DELETE Policies +-- ============================================================================ +CREATE POLICY knows_select_all ON rls_graph."KNOWS" + FOR SELECT USING (true); +-- Create test edges for delete +CREATE POLICY knows_insert_all ON rls_graph."KNOWS" + FOR INSERT WITH CHECK (true); +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Bob'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2018, strength: 'weak'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Diana'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2019, strength: 'strong'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +DROP POLICY knows_insert_all ON rls_graph."KNOWS"; +-- Policy: Can only delete edges with strength = 'weak' +CREATE POLICY knows_delete_weak ON rls_graph."KNOWS" + FOR DELETE + USING (properties->>'"strength"' = 'weak'); +SET ROLE rls_user1; +-- Should succeed - edge has strength 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2018}]->() DELETE k +$$) AS (a agtype); + a +--- +(0 rows) + +-- Should silently skip - edge has strength 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2019}]->() DELETE k +$$) AS (a agtype); + a +--- +(0 rows) + +RESET ROLE; +-- Verify 2018 edge deleted, 2019 edge still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since IN [2018, 2019] RETURN k.since ORDER BY k.since +$$) AS (since agtype); + since +------- + 2019 +(1 row) + +-- cleanup +DROP POLICY knows_delete_weak ON rls_graph."KNOWS"; +-- ============================================================================ +-- PART 11: DETACH DELETE +-- ============================================================================ +-- Re-enable Person RLS +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +CREATE POLICY person_all ON rls_graph."Person" + FOR ALL USING (true) WITH CHECK (true); +-- Create test data with a protected edge +CREATE POLICY knows_insert_all ON rls_graph."KNOWS" + FOR INSERT WITH CHECK (true); +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DetachTest1', owner: 'test', department: 'Detach', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DetachTest2', owner: 'test', department: 'Detach', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'DetachTest1'}), (b:Person {name: 'DetachTest2'}) + CREATE (a)-[:KNOWS {since: 2010, strength: 'protected'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +DROP POLICY knows_insert_all ON rls_graph."KNOWS"; +-- Policy: Cannot delete edges with strength = 'protected' +CREATE POLICY knows_delete_not_protected ON rls_graph."KNOWS" + FOR DELETE + USING (properties->>'"strength"' != 'protected'); +SET ROLE rls_user1; +-- Should ERROR - DETACH DELETE cannot silently skip (would leave dangling edge) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DetachTest1'}) DETACH DELETE p +$$) AS (a agtype); +ERROR: cannot delete edge due to row-level security policy on "KNOWS" +HINT: DETACH DELETE requires permission to delete all connected edges. +RESET ROLE; +-- Verify vertex still exists (delete was blocked) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DetachTest1'}) RETURN p.name +$$) AS (name agtype); + name +--------------- + "DetachTest1" +(1 row) + +-- cleanup +DROP POLICY person_all ON rls_graph."Person"; +DROP POLICY knows_select_all ON rls_graph."KNOWS"; +DROP POLICY knows_delete_not_protected ON rls_graph."KNOWS"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Detach' DETACH DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- ============================================================================ +-- PART 12: Multiple Labels in Single Query +-- ============================================================================ +-- Enable RLS on Document too +ALTER TABLE rls_graph."Document" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Document" FORCE ROW LEVEL SECURITY; +-- Policy: Users see their own Person records +CREATE POLICY person_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); +-- Policy: Users see only public documents +CREATE POLICY doc_public ON rls_graph."Document" + FOR SELECT + USING (properties->>'"classification"' = 'public'); +SET ROLE rls_user1; +-- Should only see Alice and Charlie (Person) with Public Doc (Document) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +----------- + "Alice" + "Charlie" +(2 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (d:Document) RETURN d.title ORDER BY d.title +$$) AS (title agtype); + title +-------------- + "Public Doc" +(1 row) + +-- Combined query - should respect both policies +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person)-[:AUTHORED]->(d:Document) + RETURN p.name, d.title +$$) AS (person agtype, doc agtype); + person | doc +---------+-------------- + "Alice" | "Public Doc" +(1 row) + +RESET ROLE; +-- ============================================================================ +-- PART 13: Permissive vs Restrictive Policies +-- ============================================================================ +DROP POLICY person_own ON rls_graph."Person"; +DROP POLICY doc_public ON rls_graph."Document"; +ALTER TABLE rls_graph."Document" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; +-- 13.1: Multiple permissive policies (OR logic) +CREATE POLICY person_permissive_own ON rls_graph."Person" + AS PERMISSIVE FOR SELECT + USING (properties->>'"owner"' = current_user); +CREATE POLICY person_permissive_eng ON rls_graph."Person" + AS PERMISSIVE FOR SELECT + USING (properties->>'"department"' = 'Engineering'); +SET ROLE rls_user1; +-- Should see: Alice (own), Charlie (own), Bob (Engineering) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department IN ['Engineering', 'Sales'] + RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +----------- + "Alice" + "Bob" + "Charlie" +(3 rows) + +RESET ROLE; +-- 13.2: Add restrictive policy (AND with permissive) +CREATE POLICY person_restrictive_level ON rls_graph."Person" + AS RESTRICTIVE FOR SELECT + USING ((properties->>'"level"')::int <= 2); +SET ROLE rls_user1; +-- Should see: Alice (own, level 1), Bob (Engineering, level 2), Charlie (own, level 1) +-- Diana (level 3) blocked by restrictive +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name, p.level ORDER BY p.name +$$) AS (name agtype, level agtype); + name | level +-----------+------- + "Alice" | 1 + "Bob" | 2 + "Charlie" | 1 +(3 rows) + +RESET ROLE; +-- 13.3: Multiple restrictive policies (all must pass) +CREATE POLICY person_restrictive_sales ON rls_graph."Person" + AS RESTRICTIVE FOR SELECT + USING (properties->>'"department"' != 'Sales'); +SET ROLE rls_user1; +-- Should see: Alice (own, level 1, not Sales), Bob (Engineering, level 2, not Sales) +-- Charlie blocked by Sales restriction +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +--------- + "Alice" + "Bob" +(2 rows) + +RESET ROLE; +-- ============================================================================ +-- PART 14: BYPASSRLS Role and Superuser Behavior +-- ============================================================================ +DROP POLICY person_permissive_own ON rls_graph."Person"; +DROP POLICY person_permissive_eng ON rls_graph."Person"; +DROP POLICY person_restrictive_level ON rls_graph."Person"; +DROP POLICY person_restrictive_sales ON rls_graph."Person"; +-- Restrictive policy that blocks most access +CREATE POLICY person_very_restrictive ON rls_graph."Person" + FOR SELECT + USING (properties->>'"name"' = 'Nobody'); +-- 14.1: Regular user sees nothing +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +------ +(0 rows) + +RESET ROLE; +-- 14.2: BYPASSRLS role sees everything +SET ROLE rls_admin; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +----------- + "Alice" + "Bob" + "Charlie" + "Diana" +(4 rows) + +RESET ROLE; +-- 14.3: Superuser sees everything (implicit bypass) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +----------- + "Alice" + "Bob" + "Charlie" + "Diana" +(4 rows) + +-- ============================================================================ +-- PART 15: Complex Multi-Operation Queries +-- ============================================================================ +DROP POLICY person_very_restrictive ON rls_graph."Person"; +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); +CREATE POLICY person_update_own ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user) + WITH CHECK (properties->>'"owner"' = current_user); +-- 15.1: MATCH + CREATE in one query +SET ROLE rls_user1; +-- Should succeed - creating with correct owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS]->(:Person {name: 'NewFromMatch', owner: 'rls_user1', department: 'Complex', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +RESET ROLE; +-- Verify creation +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'NewFromMatch'}) RETURN p.name, p.owner +$$) AS (name agtype, owner agtype); + name | owner +----------------+------------- + "NewFromMatch" | "rls_user1" +(1 row) + +-- 15.2: MATCH + SET in one query +SET ROLE rls_user1; +-- Should succeed on Alice (own), skip Bob (not own) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] + SET p.complexTest = true + RETURN p.name, p.complexTest +$$) AS (name agtype, test agtype); + name | test +---------+------ + "Alice" | true +(1 row) + +RESET ROLE; +-- Verify only Alice was updated +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] + RETURN p.name, p.complexTest ORDER BY p.name +$$) AS (name agtype, test agtype); + name | test +---------+------ + "Alice" | true + "Bob" | +(2 rows) + +-- cleanup +DROP POLICY IF EXISTS person_select_all ON rls_graph."Person"; +DROP POLICY IF EXISTS person_insert_own ON rls_graph."Person"; +DROP POLICY IF EXISTS person_update_own ON rls_graph."Person"; +-- ============================================================================ +-- PART 16: startNode/endNode RLS Enforcement +-- ============================================================================ +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; +-- Enable RLS on Person with restrictive policy +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Person" FORCE ROW LEVEL SECURITY; +-- Policy: users can only see their own Person records +CREATE POLICY person_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); +-- Enable edge access for testing +ALTER TABLE rls_graph."KNOWS" ENABLE ROW LEVEL SECURITY; +CREATE POLICY knows_all ON rls_graph."KNOWS" + FOR SELECT USING (true); +-- 16.1: startNode blocked by RLS - should error +SET ROLE rls_user1; +-- rls_user1 can see the edge (Alice->Bob) but cannot see Bob (owned by rls_user2) +-- endNode should error because Bob is blocked by RLS +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'})-[e:KNOWS]->(b) + RETURN endNode(e) +$$) AS (end_vertex agtype); +ERROR: access to vertex 844424930131970 denied by row-level security policy on "Person" +-- 16.2: endNode blocked by RLS - should error +-- rls_user1 cannot see Bob, so startNode on an edge starting from Bob should error +SET ROLE rls_user2; +-- rls_user2 can see Bob but not Alice (owned by rls_user1) +-- startNode should error because Alice is blocked by RLS +SELECT * FROM cypher('rls_graph', $$ + MATCH (a)-[e:KNOWS]->(b:Person {name: 'Bob'}) + RETURN startNode(e) +$$) AS (start_vertex agtype); +ERROR: access to vertex 844424930131969 denied by row-level security policy on "Person" +-- 16.3: startNode/endNode succeed when RLS allows access +SET ROLE rls_user1; +-- Alice->Charlie edge: rls_user1 owns both, should succeed +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'})-[e:KNOWS]->(c:Person {name: 'Charlie'}) + RETURN startNode(e).name, endNode(e).name +$$) AS (start_name agtype, end_name agtype); + start_name | end_name +------------+----------- + "Alice" | "Charlie" +(1 row) + +RESET ROLE; +-- cleanup +DROP POLICY person_own ON rls_graph."Person"; +DROP POLICY knows_all ON rls_graph."KNOWS"; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; +-- ============================================================================ +-- RLS CLEANUP +-- ============================================================================ +RESET ROLE; +-- Disable RLS on all tables +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Document" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; +-- Drop roles +DROP OWNED BY rls_user1 CASCADE; +DROP ROLE rls_user1; +DROP OWNED BY rls_user2 CASCADE; +DROP ROLE rls_user2; +DROP OWNED BY rls_admin CASCADE; +DROP ROLE rls_admin; +-- Drop test graph +SELECT drop_graph('rls_graph', true); +NOTICE: drop cascades to 6 other objects +DETAIL: drop cascades to table rls_graph._ag_label_vertex +drop cascades to table rls_graph._ag_label_edge +drop cascades to table rls_graph."Person" +drop cascades to table rls_graph."Document" +drop cascades to table rls_graph."KNOWS" +drop cascades to table rls_graph."AUTHORED" +NOTICE: graph "rls_graph" has been dropped + drop_graph +------------ + +(1 row) + diff --git a/regress/sql/security.sql b/regress/sql/security.sql new file mode 100644 index 000000000..344dd23d4 --- /dev/null +++ b/regress/sql/security.sql @@ -0,0 +1,1451 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +LOAD 'age'; +SET search_path TO ag_catalog; + +-- +-- Test Privileges +-- + +-- +-- Setup: Create test graph and data as superuser +-- +SELECT create_graph('security_test'); + +-- Create test vertices +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Alice', age: 30}) +$$) AS (a agtype); + +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Bob', age: 25}) +$$) AS (a agtype); + +SELECT * FROM cypher('security_test', $$ + CREATE (:Document {title: 'Secret', content: 'classified'}) +$$) AS (a agtype); + +-- Create test edges +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:KNOWS {since: 2020}]->(b) +$$) AS (a agtype); + +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (d:Document) + CREATE (a)-[:OWNS]->(d) +$$) AS (a agtype); + +-- +-- Create test roles with different permission levels +-- + +-- Role with only SELECT (read-only) +CREATE ROLE security_test_readonly LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_readonly; +GRANT SELECT ON ALL TABLES IN SCHEMA security_test TO security_test_readonly; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_readonly; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_readonly; + +-- Role with SELECT and INSERT +CREATE ROLE security_test_insert LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_insert; +GRANT SELECT, INSERT ON ALL TABLES IN SCHEMA security_test TO security_test_insert; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_insert; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_insert; +-- Grant sequence usage for ID generation +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_insert; + +-- Role with SELECT and UPDATE +CREATE ROLE security_test_update LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_update; +GRANT SELECT, UPDATE ON ALL TABLES IN SCHEMA security_test TO security_test_update; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_update; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_update; + +-- Role with SELECT and DELETE +CREATE ROLE security_test_delete LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_delete; +GRANT SELECT, DELETE ON ALL TABLES IN SCHEMA security_test TO security_test_delete; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_delete; + +CREATE ROLE security_test_detach_delete LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_detach_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA security_test TO security_test_detach_delete; +GRANT DELETE ON security_test."Person" TO security_test_detach_delete; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_detach_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_detach_delete; + +-- Role with all permissions +CREATE ROLE security_test_full LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_full; +GRANT ALL ON ALL TABLES IN SCHEMA security_test TO security_test_full; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_full; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_full; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_full; + +-- Role with NO SELECT on graph tables (to test read failures) +CREATE ROLE security_test_noread LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_noread; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_noread; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_noread; +-- No SELECT on security_test tables + +-- ============================================================================ +-- PART 1: SELECT Permission Tests - Failure Cases (No Read Permission) +-- ============================================================================ + +SET ROLE security_test_noread; + +-- Test: MATCH on vertices should fail without SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person) RETURN p.name +$$) AS (name agtype); + +-- Test: MATCH on edges should fail without SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH ()-[k:KNOWS]->() RETURN k +$$) AS (k agtype); + +-- Test: MATCH with path should fail +SELECT * FROM cypher('security_test', $$ + MATCH (a)-[e]->(b) RETURN a, e, b +$$) AS (a agtype, e agtype, b agtype); + +RESET ROLE; + +-- Create role with SELECT only on base label tables, not child labels +-- NOTE: PostgreSQL inheritance allows access to child table rows when querying +-- through a parent table. This is expected behavior - SELECT on _ag_label_vertex +-- allows reading all vertices (including Person, Document) via inheritance. +CREATE ROLE security_test_base_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_base_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_base_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_base_only; +-- Only grant SELECT on base tables, NOT on Person, Document, KNOWS, OWNS +GRANT SELECT ON security_test._ag_label_vertex TO security_test_base_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_base_only; + +SET ROLE security_test_base_only; + +-- Test: MATCH (n) succeeds because PostgreSQL inheritance allows access to child rows +-- when querying through parent table. Permission on _ag_label_vertex grants read +-- access to all vertices via inheritance hierarchy. +SELECT * FROM cypher('security_test', $$ + MATCH (n) RETURN n +$$) AS (n agtype); + +-- Test: MATCH ()-[e]->() succeeds via inheritance (same reason as above) +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e]->() RETURN e +$$) AS (e agtype); + +-- ============================================================================ +-- PART 2: SELECT Permission Tests - Success Cases (Read-Only Role) +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_readonly; + +-- Test: MATCH should succeed with SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- Test: MATCH with edges should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person)-[k:KNOWS]->(b:Person) + RETURN a.name, b.name +$$) AS (a agtype, b agtype); + +-- Test: MATCH across multiple labels should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person)-[:OWNS]->(d:Document) + RETURN p.name, d.title +$$) AS (person agtype, doc agtype); + +-- ============================================================================ +-- PART 3: INSERT Permission Tests (CREATE clause) +-- ============================================================================ + +-- Test: CREATE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Charlie'}) +$$) AS (a agtype); + +-- Test: CREATE edge should fail +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:FRIENDS]->(b) +$$) AS (a agtype); + +RESET ROLE; +SET ROLE security_test_insert; + +-- Test: CREATE vertex should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Charlie', age: 35}) +$$) AS (a agtype); + +-- Test: CREATE edge should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Charlie'}), (b:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS {since: 2023}]->(b) +$$) AS (a agtype); + +-- Verify the inserts worked +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) RETURN p.name, p.age +$$) AS (name agtype, age agtype); + +-- ============================================================================ +-- PART 4: UPDATE Permission Tests (SET clause) +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_readonly; + +-- Test: SET should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) + SET p.age = 31 + RETURN p +$$) AS (p agtype); + +-- Test: SET on edge should fail +SELECT * FROM cypher('security_test', $$ + MATCH ()-[k:KNOWS]->() + SET k.since = 2021 + RETURN k +$$) AS (k agtype); + +RESET ROLE; +SET ROLE security_test_update; + +-- Test: SET should succeed with UPDATE permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) + SET p.age = 31 + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + +-- Test: SET on edge should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'})-[k:KNOWS]->(b:Person {name: 'Bob'}) + SET k.since = 2019 + RETURN k.since +$$) AS (since agtype); + +-- Test: SET with map update should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + SET p += {hobby: 'reading'} + RETURN p.name, p.hobby +$$) AS (name agtype, hobby agtype); + +-- ============================================================================ +-- PART 5: UPDATE Permission Tests (REMOVE clause) +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_readonly; + +-- Test: REMOVE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + REMOVE p.hobby + RETURN p +$$) AS (p agtype); + +RESET ROLE; +SET ROLE security_test_update; + +-- Test: REMOVE should succeed with UPDATE permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + REMOVE p.hobby + RETURN p.name, p.hobby +$$) AS (name agtype, hobby agtype); + +-- ============================================================================ +-- PART 6: DELETE Permission Tests +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_readonly; + +-- Test: DELETE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); + +RESET ROLE; +SET ROLE security_test_update; + +-- Test: DELETE should fail with only UPDATE permission (need DELETE) +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); + +RESET ROLE; +SET ROLE security_test_delete; + +-- Test: DELETE vertex should succeed with DELETE permission +-- First delete the edge connected to Charlie +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'})-[k:KNOWS]->() + DELETE k +$$) AS (a agtype); + +-- Now delete the vertex +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); + +-- Verify deletion +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) RETURN p +$$) AS (p agtype); + +-- ============================================================================ +-- PART 7: DETACH DELETE Tests +-- ============================================================================ + +RESET ROLE; + +-- Create a new vertex with edge for DETACH DELETE test +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Dave', age: 40}) +$$) AS (a agtype); + +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (d:Person {name: 'Dave'}) + CREATE (a)-[:KNOWS {since: 2022}]->(d) +$$) AS (a agtype); + +SET ROLE security_test_detach_delete; + +-- Test: DETACH DELETE should fail without DELETE on edge table +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) + DETACH DELETE p +$$) AS (a agtype); + +RESET ROLE; +GRANT DELETE ON security_test."KNOWS" TO security_test_detach_delete; +SET ROLE security_test_detach_delete; + +-- Test: DETACH DELETE should succeed now when user has DELETE on both vertex and edge tables +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) + DETACH DELETE p +$$) AS (a agtype); + +-- Verify deletion +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) RETURN p +$$) AS (p agtype); + +-- ============================================================================ +-- PART 8: MERGE Permission Tests +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_readonly; + +-- Test: MERGE that would create should fail without INSERT +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve'}) + RETURN p +$$) AS (p agtype); + +RESET ROLE; +SET ROLE security_test_insert; + +-- Test: MERGE that creates should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve', age: 28}) + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + +-- Test: MERGE that matches existing should succeed (only needs SELECT) +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve'}) + RETURN p.name +$$) AS (name agtype); + +-- ============================================================================ +-- PART 9: Full Permission Role Tests +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_full; + +-- Full permission role should be able to do everything +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Frank', age: 50}) +$$) AS (a agtype); + +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Frank'}) + SET p.age = 51 + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Frank'}) + DELETE p +$$) AS (a agtype); + +-- ============================================================================ +-- PART 10: Permission on Specific Labels +-- ============================================================================ + +RESET ROLE; + +-- Create a role with permission only on Person label, not Document +CREATE ROLE security_test_person_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_person_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_person_only; +GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA ag_catalog TO security_test_person_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_person_only; +-- Only grant permissions on Person table +GRANT SELECT, INSERT, UPDATE, DELETE ON security_test."Person" TO security_test_person_only; +GRANT SELECT ON security_test."KNOWS" TO security_test_person_only; +GRANT SELECT ON security_test._ag_label_vertex TO security_test_person_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_person_only; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_person_only; + +SET ROLE security_test_person_only; + +-- Test: Operations on Person should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) RETURN p.name +$$) AS (name agtype); + +-- Test: SELECT on Document should fail (no permission) +SELECT * FROM cypher('security_test', $$ + MATCH (d:Document) RETURN d.title +$$) AS (title agtype); + +-- Test: CREATE Document should fail (no permission on Document table) +SELECT * FROM cypher('security_test', $$ + CREATE (:Document {title: 'New Doc'}) +$$) AS (a agtype); + +-- ============================================================================ +-- PART 11: Function EXECUTE Permission Tests +-- ============================================================================ + +RESET ROLE; + +-- Create role with no function execute permissions +CREATE ROLE security_test_noexec LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_noexec; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_noexec; + +-- Revoke execute from PUBLIC on functions we want to test +REVOKE EXECUTE ON FUNCTION ag_catalog.create_graph(name) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.drop_graph(name, boolean) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.create_elabel(cstring, cstring) FROM PUBLIC; + +SET ROLE security_test_noexec; + +-- Test: create_graph should fail without EXECUTE permission +SELECT create_graph('unauthorized_graph'); + +-- Test: drop_graph should fail without EXECUTE permission +SELECT drop_graph('security_test', true); + +-- Test: create_vlabel should fail without EXECUTE permission +SELECT create_vlabel('security_test', 'NewLabel'); + +-- Test: create_elabel should fail without EXECUTE permission +SELECT create_elabel('security_test', 'NewEdge'); + +RESET ROLE; + +-- Grant execute on specific function and test +GRANT EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) TO security_test_noexec; + +SET ROLE security_test_noexec; + +-- Test: create_vlabel should now get past execute check (will fail on schema permission instead) +SELECT create_vlabel('security_test', 'TestLabel'); + +-- Test: create_graph should still fail with execute permission denied +SELECT create_graph('unauthorized_graph'); + +RESET ROLE; + +-- Restore execute permissions to PUBLIC +GRANT EXECUTE ON FUNCTION ag_catalog.create_graph(name) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.drop_graph(name, boolean) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.create_elabel(cstring, cstring) TO PUBLIC; + +-- ============================================================================ +-- PART 12: startNode/endNode Permission Tests +-- ============================================================================ + +-- Create role with SELECT on base tables but NOT on Person label +CREATE ROLE security_test_edge_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_edge_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_edge_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_edge_only; +GRANT SELECT ON security_test."KNOWS" TO security_test_edge_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_edge_only; +GRANT SELECT ON security_test._ag_label_vertex TO security_test_edge_only; +-- Note: NOT granting SELECT on security_test."Person" + +SET ROLE security_test_edge_only; + +-- Test: endNode fails without SELECT on Person table +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN endNode(e) +$$) AS (end_vertex agtype); + +-- Test: startNode fails without SELECT on Person table +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN startNode(e) +$$) AS (start_vertex agtype); + +RESET ROLE; + +-- Grant SELECT on Person and verify success +GRANT SELECT ON security_test."Person" TO security_test_edge_only; + +SET ROLE security_test_edge_only; + +-- Test: Should now succeed with SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN startNode(e).name, endNode(e).name +$$) AS (start_name agtype, end_name agtype); + +RESET ROLE; + +-- ============================================================================ +-- Cleanup +-- ============================================================================ + +RESET ROLE; + +-- Drop all owned objects and privileges for each role, then drop the role +DROP OWNED BY security_test_noread CASCADE; +DROP ROLE security_test_noread; + +DROP OWNED BY security_test_base_only CASCADE; +DROP ROLE security_test_base_only; + +DROP OWNED BY security_test_readonly CASCADE; +DROP ROLE security_test_readonly; + +DROP OWNED BY security_test_insert CASCADE; +DROP ROLE security_test_insert; + +DROP OWNED BY security_test_update CASCADE; +DROP ROLE security_test_update; + +DROP OWNED BY security_test_delete CASCADE; +DROP ROLE security_test_delete; + +DROP OWNED BY security_test_detach_delete CASCADE; +DROP ROLE security_test_detach_delete; + +DROP OWNED BY security_test_full CASCADE; +DROP ROLE security_test_full; + +DROP OWNED BY security_test_person_only CASCADE; +DROP ROLE security_test_person_only; + +DROP OWNED BY security_test_noexec CASCADE; +DROP ROLE security_test_noexec; + +DROP OWNED BY security_test_edge_only CASCADE; +DROP ROLE security_test_edge_only; + +-- Drop test graph +SELECT drop_graph('security_test', true); + +-- +-- Row-Level Security (RLS) Tests +-- + +-- +-- Setup: Create test graph, data and roles for RLS tests +-- +SELECT create_graph('rls_graph'); + +-- Create test roles +CREATE ROLE rls_user1 LOGIN; +CREATE ROLE rls_user2 LOGIN; +CREATE ROLE rls_admin LOGIN BYPASSRLS; -- Role that bypasses RLS + +-- Create base test data FIRST (as superuser) - this creates the label tables +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Alice', owner: 'rls_user1', department: 'Engineering', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Bob', owner: 'rls_user2', department: 'Engineering', level: 2}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Charlie', owner: 'rls_user1', department: 'Sales', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Diana', owner: 'rls_user2', department: 'Sales', level: 3}) +$$) AS (a agtype); + +-- Create a second vertex label for multi-label tests +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Document {title: 'Public Doc', classification: 'public', owner: 'rls_user1'}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Document {title: 'Secret Doc', classification: 'secret', owner: 'rls_user2'}) +$$) AS (a agtype); + +-- Create edges +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:KNOWS {since: 2020, strength: 'weak'}]->(b) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Charlie'}), (b:Person {name: 'Diana'}) + CREATE (a)-[:KNOWS {since: 2021, strength: 'strong'}]->(b) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2022, strength: 'strong'}]->(b) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (d:Document {title: 'Public Doc'}) + CREATE (a)-[:AUTHORED]->(d) +$$) AS (a agtype); + +-- Grant permissions AFTER creating tables (so Person, Document, KNOWS, AUTHORED exist) +GRANT USAGE ON SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; +GRANT ALL ON ALL TABLES IN SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; +GRANT USAGE ON SCHEMA ag_catalog TO rls_user1, rls_user2, rls_admin; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; + +-- ============================================================================ +-- PART 1: Vertex SELECT Policies (USING clause) +-- ============================================================================ + +-- Enable RLS on Person label +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Person" FORCE ROW LEVEL SECURITY; + +-- 1.1: Basic ownership filtering +CREATE POLICY person_select_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); + +-- Test as rls_user1 - should only see Alice and Charlie (owned by rls_user1) +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- Test as rls_user2 - should only see Bob and Diana (owned by rls_user2) +SET ROLE rls_user2; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- 1.2: Default deny - no permissive policies means no access +DROP POLICY person_select_own ON rls_graph."Person"; + +-- With no policies, RLS blocks all access +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- ============================================================================ +-- PART 2: Vertex INSERT Policies (WITH CHECK) - CREATE +-- ============================================================================ + +-- Allow SELECT for all (so we can verify results) +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); + +-- 2.1: Basic WITH CHECK - users can only insert rows they own +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); + +-- Test as rls_user1 - should succeed (owner matches current_user) +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'User1Created', owner: 'rls_user1', department: 'Test', level: 1}) +$$) AS (a agtype); + +-- Test as rls_user1 - should FAIL (owner doesn't match current_user) +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'User1Fake', owner: 'rls_user2', department: 'Test', level: 1}) +$$) AS (a agtype); + +RESET ROLE; + +-- Verify only User1Created was created +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Test' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- 2.2: Default deny for INSERT - no INSERT policy blocks all inserts +DROP POLICY person_insert_own ON rls_graph."Person"; + +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'ShouldFail', owner: 'rls_user1', department: 'Blocked', level: 1}) +$$) AS (a agtype); +RESET ROLE; + +-- Verify nothing was created in Blocked department +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Blocked' RETURN p.name +$$) AS (name agtype); + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Test' DELETE p +$$) AS (a agtype); + +-- ============================================================================ +-- PART 3: Vertex UPDATE Policies - SET +-- ============================================================================ + +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); + +-- 3.1: USING clause only - filter which rows can be updated +CREATE POLICY person_update_using ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user); + +SET ROLE rls_user1; + +-- Should succeed - rls_user1 owns Alice +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.updated = true RETURN p.name, p.updated +$$) AS (name agtype, updated agtype); + +-- Should silently skip - rls_user1 doesn't own Bob (USING filters it out) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) SET p.updated = true RETURN p.name, p.updated +$$) AS (name agtype, updated agtype); + +RESET ROLE; + +-- Verify Alice was updated, Bob was not +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] RETURN p.name, p.updated ORDER BY p.name +$$) AS (name agtype, updated agtype); + +-- 3.2: WITH CHECK clause - validate new values +DROP POLICY person_update_using ON rls_graph."Person"; + +CREATE POLICY person_update_check ON rls_graph."Person" + FOR UPDATE + USING (true) -- Can update any row + WITH CHECK (properties->>'"owner"' = current_user); -- But new value must keep owner + +SET ROLE rls_user1; + +-- Should succeed - modifying property but keeping owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.verified = true RETURN p.name, p.verified +$$) AS (name agtype, verified agtype); + +-- Should FAIL - trying to change owner to someone else +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.owner = 'rls_user2' RETURN p.owner +$$) AS (owner agtype); + +RESET ROLE; + +-- Verify owner wasn't changed +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) RETURN p.owner +$$) AS (owner agtype); + +-- 3.3: Both USING and WITH CHECK together +DROP POLICY person_update_check ON rls_graph."Person"; + +CREATE POLICY person_update_both ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user) + WITH CHECK (properties->>'"owner"' = current_user); + +SET ROLE rls_user1; + +-- Should succeed - owns Alice, keeping owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.status = 'active' RETURN p.name, p.status +$$) AS (name agtype, status agtype); + +-- Should silently skip - doesn't own Bob (USING filters) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) SET p.status = 'active' RETURN p.name, p.status +$$) AS (name agtype, status agtype); + +RESET ROLE; + +-- ============================================================================ +-- PART 4: Vertex UPDATE Policies - REMOVE +-- ============================================================================ + +-- Keep existing update policy, test REMOVE operation + +SET ROLE rls_user1; + +-- Should succeed - owns Alice +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) REMOVE p.status RETURN p.name, p.status +$$) AS (name agtype, status agtype); + +-- Should silently skip - doesn't own Bob +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) REMOVE p.department RETURN p.name, p.department +$$) AS (name agtype, dept agtype); + +RESET ROLE; + +-- Verify Bob still has department +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) RETURN p.department +$$) AS (dept agtype); + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +DROP POLICY person_update_both ON rls_graph."Person"; + +-- ============================================================================ +-- PART 5: Vertex DELETE Policies +-- ============================================================================ + +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); + +-- Create test data for delete tests +CREATE POLICY person_insert_all ON rls_graph."Person" + FOR INSERT WITH CHECK (true); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest1', owner: 'rls_user1', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest2', owner: 'rls_user2', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest3', owner: 'rls_user1', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + +DROP POLICY person_insert_all ON rls_graph."Person"; + +-- 5.1: Basic USING filtering for DELETE +CREATE POLICY person_delete_own ON rls_graph."Person" + FOR DELETE + USING (properties->>'"owner"' = current_user); + +SET ROLE rls_user1; + +-- Should succeed - owns DeleteTest1 +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest1'}) DELETE p +$$) AS (a agtype); + +-- Should silently skip - doesn't own DeleteTest2 +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest2'}) DELETE p +$$) AS (a agtype); + +RESET ROLE; + +-- Verify DeleteTest1 deleted, DeleteTest2 still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'DeleteTest' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- 5.2: Default deny for DELETE - no policy blocks all deletes +DROP POLICY person_delete_own ON rls_graph."Person"; + +SET ROLE rls_user1; + +-- Should silently skip - no DELETE policy means default deny +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest3'}) DELETE p +$$) AS (a agtype); + +RESET ROLE; + +-- Verify DeleteTest3 still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest3'}) RETURN p.name +$$) AS (name agtype); + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'DeleteTest' DELETE p +$$) AS (a agtype); + +-- ============================================================================ +-- PART 6: MERGE Policies +-- ============================================================================ + +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); + +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); + +-- 6.1: MERGE creating new vertex - INSERT policy applies +SET ROLE rls_user1; + +-- Should succeed - creating with correct owner +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'MergeNew1', owner: 'rls_user1', department: 'Merge', level: 1}) + RETURN p.name +$$) AS (name agtype); + +-- Should FAIL - creating with wrong owner +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'MergeNew2', owner: 'rls_user2', department: 'Merge', level: 1}) + RETURN p.name +$$) AS (name agtype); + +RESET ROLE; + +-- 6.2: MERGE matching existing - only SELECT needed +SET ROLE rls_user1; + +-- Should succeed - Alice exists and SELECT allowed +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'Alice'}) + RETURN p.name, p.owner +$$) AS (name agtype, owner agtype); + +RESET ROLE; + +-- Verify only MergeNew1 was created +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Merge' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +DROP POLICY person_insert_own ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Merge' DELETE p +$$) AS (a agtype); + +-- ============================================================================ +-- PART 7: Edge SELECT Policies +-- ============================================================================ + +-- Disable vertex RLS, enable edge RLS +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" FORCE ROW LEVEL SECURITY; + +-- Policy: Only see edges from 2021 or later +CREATE POLICY knows_select_recent ON rls_graph."KNOWS" + FOR SELECT + USING ((properties->>'"since"')::int >= 2021); + +SET ROLE rls_user1; + +-- Should only see 2021 and 2022 edges (not 2020) +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() RETURN k.since ORDER BY k.since +$$) AS (since agtype); + +RESET ROLE; + +-- ============================================================================ +-- PART 8: Edge INSERT Policies (CREATE edge) +-- ============================================================================ + +DROP POLICY knows_select_recent ON rls_graph."KNOWS"; + +CREATE POLICY knows_select_all ON rls_graph."KNOWS" + FOR SELECT USING (true); + +-- Policy: Can only create edges with strength = 'strong' +CREATE POLICY knows_insert_strong ON rls_graph."KNOWS" + FOR INSERT + WITH CHECK (properties->>'"strength"' = 'strong'); + +SET ROLE rls_user1; + +-- Should succeed - strength is 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Bob'}), (b:Person {name: 'Diana'}) + CREATE (a)-[:KNOWS {since: 2023, strength: 'strong'}]->(b) +$$) AS (a agtype); + +-- Should FAIL - strength is 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Diana'}), (b:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS {since: 2023, strength: 'weak'}]->(b) +$$) AS (a agtype); + +RESET ROLE; + +-- Verify only strong edge was created +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since = 2023 RETURN k.strength ORDER BY k.strength +$$) AS (strength agtype); + +-- cleanup +DROP POLICY knows_insert_strong ON rls_graph."KNOWS"; + +-- ============================================================================ +-- PART 9: Edge UPDATE Policies (SET on edge) +-- ============================================================================ + +-- Policy: Can only update edges with strength = 'strong' +CREATE POLICY knows_update_strong ON rls_graph."KNOWS" + FOR UPDATE + USING (properties->>'"strength"' = 'strong') + WITH CHECK (properties->>'"strength"' = 'strong'); + +SET ROLE rls_user1; + +-- Should succeed - edge has strength 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2021}]->() SET k.notes = 'updated' RETURN k.since, k.notes +$$) AS (since agtype, notes agtype); + +-- Should silently skip - edge has strength 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2020}]->() SET k.notes = 'updated' RETURN k.since, k.notes +$$) AS (since agtype, notes agtype); + +RESET ROLE; + +-- Verify only 2021 edge was updated +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since IN [2020, 2021] RETURN k.since, k.notes ORDER BY k.since +$$) AS (since agtype, notes agtype); + +-- cleanup +DROP POLICY knows_select_all ON rls_graph."KNOWS"; +DROP POLICY knows_update_strong ON rls_graph."KNOWS"; + +-- ============================================================================ +-- PART 10: Edge DELETE Policies +-- ============================================================================ + +CREATE POLICY knows_select_all ON rls_graph."KNOWS" + FOR SELECT USING (true); + +-- Create test edges for delete +CREATE POLICY knows_insert_all ON rls_graph."KNOWS" + FOR INSERT WITH CHECK (true); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Bob'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2018, strength: 'weak'}]->(b) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Diana'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2019, strength: 'strong'}]->(b) +$$) AS (a agtype); + +DROP POLICY knows_insert_all ON rls_graph."KNOWS"; + +-- Policy: Can only delete edges with strength = 'weak' +CREATE POLICY knows_delete_weak ON rls_graph."KNOWS" + FOR DELETE + USING (properties->>'"strength"' = 'weak'); + +SET ROLE rls_user1; + +-- Should succeed - edge has strength 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2018}]->() DELETE k +$$) AS (a agtype); + +-- Should silently skip - edge has strength 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2019}]->() DELETE k +$$) AS (a agtype); + +RESET ROLE; + +-- Verify 2018 edge deleted, 2019 edge still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since IN [2018, 2019] RETURN k.since ORDER BY k.since +$$) AS (since agtype); + +-- cleanup +DROP POLICY knows_delete_weak ON rls_graph."KNOWS"; + +-- ============================================================================ +-- PART 11: DETACH DELETE +-- ============================================================================ + +-- Re-enable Person RLS +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +CREATE POLICY person_all ON rls_graph."Person" + FOR ALL USING (true) WITH CHECK (true); + +-- Create test data with a protected edge +CREATE POLICY knows_insert_all ON rls_graph."KNOWS" + FOR INSERT WITH CHECK (true); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DetachTest1', owner: 'test', department: 'Detach', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DetachTest2', owner: 'test', department: 'Detach', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'DetachTest1'}), (b:Person {name: 'DetachTest2'}) + CREATE (a)-[:KNOWS {since: 2010, strength: 'protected'}]->(b) +$$) AS (a agtype); + +DROP POLICY knows_insert_all ON rls_graph."KNOWS"; + +-- Policy: Cannot delete edges with strength = 'protected' +CREATE POLICY knows_delete_not_protected ON rls_graph."KNOWS" + FOR DELETE + USING (properties->>'"strength"' != 'protected'); + +SET ROLE rls_user1; + +-- Should ERROR - DETACH DELETE cannot silently skip (would leave dangling edge) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DetachTest1'}) DETACH DELETE p +$$) AS (a agtype); + +RESET ROLE; + +-- Verify vertex still exists (delete was blocked) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DetachTest1'}) RETURN p.name +$$) AS (name agtype); + +-- cleanup +DROP POLICY person_all ON rls_graph."Person"; +DROP POLICY knows_select_all ON rls_graph."KNOWS"; +DROP POLICY knows_delete_not_protected ON rls_graph."KNOWS"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Detach' DETACH DELETE p +$$) AS (a agtype); + +-- ============================================================================ +-- PART 12: Multiple Labels in Single Query +-- ============================================================================ + +-- Enable RLS on Document too +ALTER TABLE rls_graph."Document" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Document" FORCE ROW LEVEL SECURITY; + +-- Policy: Users see their own Person records +CREATE POLICY person_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); + +-- Policy: Users see only public documents +CREATE POLICY doc_public ON rls_graph."Document" + FOR SELECT + USING (properties->>'"classification"' = 'public'); + +SET ROLE rls_user1; + +-- Should only see Alice and Charlie (Person) with Public Doc (Document) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (d:Document) RETURN d.title ORDER BY d.title +$$) AS (title agtype); + +-- Combined query - should respect both policies +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person)-[:AUTHORED]->(d:Document) + RETURN p.name, d.title +$$) AS (person agtype, doc agtype); + +RESET ROLE; + +-- ============================================================================ +-- PART 13: Permissive vs Restrictive Policies +-- ============================================================================ + +DROP POLICY person_own ON rls_graph."Person"; +DROP POLICY doc_public ON rls_graph."Document"; + +ALTER TABLE rls_graph."Document" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; + +-- 13.1: Multiple permissive policies (OR logic) +CREATE POLICY person_permissive_own ON rls_graph."Person" + AS PERMISSIVE FOR SELECT + USING (properties->>'"owner"' = current_user); + +CREATE POLICY person_permissive_eng ON rls_graph."Person" + AS PERMISSIVE FOR SELECT + USING (properties->>'"department"' = 'Engineering'); + +SET ROLE rls_user1; + +-- Should see: Alice (own), Charlie (own), Bob (Engineering) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department IN ['Engineering', 'Sales'] + RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- 13.2: Add restrictive policy (AND with permissive) +CREATE POLICY person_restrictive_level ON rls_graph."Person" + AS RESTRICTIVE FOR SELECT + USING ((properties->>'"level"')::int <= 2); + +SET ROLE rls_user1; + +-- Should see: Alice (own, level 1), Bob (Engineering, level 2), Charlie (own, level 1) +-- Diana (level 3) blocked by restrictive +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name, p.level ORDER BY p.name +$$) AS (name agtype, level agtype); + +RESET ROLE; + +-- 13.3: Multiple restrictive policies (all must pass) +CREATE POLICY person_restrictive_sales ON rls_graph."Person" + AS RESTRICTIVE FOR SELECT + USING (properties->>'"department"' != 'Sales'); + +SET ROLE rls_user1; + +-- Should see: Alice (own, level 1, not Sales), Bob (Engineering, level 2, not Sales) +-- Charlie blocked by Sales restriction +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- ============================================================================ +-- PART 14: BYPASSRLS Role and Superuser Behavior +-- ============================================================================ + +DROP POLICY person_permissive_own ON rls_graph."Person"; +DROP POLICY person_permissive_eng ON rls_graph."Person"; +DROP POLICY person_restrictive_level ON rls_graph."Person"; +DROP POLICY person_restrictive_sales ON rls_graph."Person"; + +-- Restrictive policy that blocks most access +CREATE POLICY person_very_restrictive ON rls_graph."Person" + FOR SELECT + USING (properties->>'"name"' = 'Nobody'); + +-- 14.1: Regular user sees nothing +SET ROLE rls_user1; + +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- 14.2: BYPASSRLS role sees everything +SET ROLE rls_admin; + +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- 14.3: Superuser sees everything (implicit bypass) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- ============================================================================ +-- PART 15: Complex Multi-Operation Queries +-- ============================================================================ + +DROP POLICY person_very_restrictive ON rls_graph."Person"; + +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); + +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); + +CREATE POLICY person_update_own ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user) + WITH CHECK (properties->>'"owner"' = current_user); + +-- 15.1: MATCH + CREATE in one query +SET ROLE rls_user1; + +-- Should succeed - creating with correct owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS]->(:Person {name: 'NewFromMatch', owner: 'rls_user1', department: 'Complex', level: 1}) +$$) AS (a agtype); + +RESET ROLE; + +-- Verify creation +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'NewFromMatch'}) RETURN p.name, p.owner +$$) AS (name agtype, owner agtype); + +-- 15.2: MATCH + SET in one query +SET ROLE rls_user1; + +-- Should succeed on Alice (own), skip Bob (not own) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] + SET p.complexTest = true + RETURN p.name, p.complexTest +$$) AS (name agtype, test agtype); + +RESET ROLE; + +-- Verify only Alice was updated +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] + RETURN p.name, p.complexTest ORDER BY p.name +$$) AS (name agtype, test agtype); + +-- cleanup +DROP POLICY IF EXISTS person_select_all ON rls_graph."Person"; +DROP POLICY IF EXISTS person_insert_own ON rls_graph."Person"; +DROP POLICY IF EXISTS person_update_own ON rls_graph."Person"; + +-- ============================================================================ +-- PART 16: startNode/endNode RLS Enforcement +-- ============================================================================ + +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; + +-- Enable RLS on Person with restrictive policy +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Person" FORCE ROW LEVEL SECURITY; + +-- Policy: users can only see their own Person records +CREATE POLICY person_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); + +-- Enable edge access for testing +ALTER TABLE rls_graph."KNOWS" ENABLE ROW LEVEL SECURITY; +CREATE POLICY knows_all ON rls_graph."KNOWS" + FOR SELECT USING (true); + +-- 16.1: startNode blocked by RLS - should error +SET ROLE rls_user1; + +-- rls_user1 can see the edge (Alice->Bob) but cannot see Bob (owned by rls_user2) +-- endNode should error because Bob is blocked by RLS +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'})-[e:KNOWS]->(b) + RETURN endNode(e) +$$) AS (end_vertex agtype); + +-- 16.2: endNode blocked by RLS - should error +-- rls_user1 cannot see Bob, so startNode on an edge starting from Bob should error +SET ROLE rls_user2; + +-- rls_user2 can see Bob but not Alice (owned by rls_user1) +-- startNode should error because Alice is blocked by RLS +SELECT * FROM cypher('rls_graph', $$ + MATCH (a)-[e:KNOWS]->(b:Person {name: 'Bob'}) + RETURN startNode(e) +$$) AS (start_vertex agtype); + +-- 16.3: startNode/endNode succeed when RLS allows access +SET ROLE rls_user1; + +-- Alice->Charlie edge: rls_user1 owns both, should succeed +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'})-[e:KNOWS]->(c:Person {name: 'Charlie'}) + RETURN startNode(e).name, endNode(e).name +$$) AS (start_name agtype, end_name agtype); + +RESET ROLE; + +-- cleanup +DROP POLICY person_own ON rls_graph."Person"; +DROP POLICY knows_all ON rls_graph."KNOWS"; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; + +-- ============================================================================ +-- RLS CLEANUP +-- ============================================================================ + +RESET ROLE; + +-- Disable RLS on all tables +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Document" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; + +-- Drop roles +DROP OWNED BY rls_user1 CASCADE; +DROP ROLE rls_user1; + +DROP OWNED BY rls_user2 CASCADE; +DROP ROLE rls_user2; + +DROP OWNED BY rls_admin CASCADE; +DROP ROLE rls_admin; + +-- Drop test graph +SELECT drop_graph('rls_graph', true); diff --git a/src/backend/executor/cypher_create.c b/src/backend/executor/cypher_create.c index 2091ea29c..a90c2a196 100644 --- a/src/backend/executor/cypher_create.c +++ b/src/backend/executor/cypher_create.c @@ -19,6 +19,8 @@ #include "postgres.h" +#include "utils/rls.h" + #include "catalog/ag_label.h" #include "executor/cypher_executor.h" #include "executor/cypher_utils.h" @@ -120,6 +122,12 @@ static void begin_cypher_create(CustomScanState *node, EState *estate, cypher_node->prop_expr_state = ExecInitExpr(cypher_node->prop_expr, (PlanState *)node); } + + /* Setup RLS WITH CHECK policies if RLS is enabled */ + if (check_enable_rls(rel->rd_id, InvalidOid, true) == RLS_ENABLED) + { + setup_wcos(cypher_node->resultRelInfo, estate, node, CMD_INSERT); + } } } diff --git a/src/backend/executor/cypher_delete.c b/src/backend/executor/cypher_delete.c index 6bb869833..d58513535 100644 --- a/src/backend/executor/cypher_delete.c +++ b/src/backend/executor/cypher_delete.c @@ -19,8 +19,11 @@ #include "postgres.h" -#include "storage/bufmgr.h" #include "common/hashfn.h" +#include "miscadmin.h" +#include "storage/bufmgr.h" +#include "utils/acl.h" +#include "utils/rls.h" #include "catalog/ag_label.h" #include "executor/cypher_executor.h" @@ -366,6 +369,16 @@ static void process_delete_list(CustomScanState *node) ExprContext *econtext = css->css.ss.ps.ps_ExprContext; TupleTableSlot *scanTupleSlot = econtext->ecxt_scantuple; EState *estate = node->ss.ps.state; + HTAB *qual_cache = NULL; + HASHCTL hashctl; + + /* Hash table for caching compiled security quals per label */ + MemSet(&hashctl, 0, sizeof(hashctl)); + hashctl.keysize = sizeof(Oid); + hashctl.entrysize = sizeof(RLSCacheEntry); + hashctl.hcxt = CurrentMemoryContext; + qual_cache = hash_create("delete_qual_cache", 8, &hashctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); foreach(lc, css->delete_data->delete_items) { @@ -378,6 +391,7 @@ static void process_delete_list(CustomScanState *node) char *label_name; Integer *pos; int entity_position; + Oid relid; item = lfirst(lc); @@ -396,6 +410,7 @@ static void process_delete_list(CustomScanState *node) label_name = pnstrdup(label->val.string.val, label->val.string.len); resultRelInfo = create_entity_result_rel_info(estate, css->delete_data->graph_name, label_name); + relid = RelationGetRelid(resultRelInfo->ri_RelationDesc); /* * Setup the scan key to require the id field on-disc to match the @@ -443,6 +458,36 @@ static void process_delete_list(CustomScanState *node) continue; } + /* Check RLS security quals (USING policy) before delete */ + if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED) + { + RLSCacheEntry *entry; + bool found; + + /* Get cached security quals and slot for this label */ + entry = hash_search(qual_cache, &relid, HASH_ENTER, &found); + if (!found) + { + entry->qualExprs = setup_security_quals(resultRelInfo, estate, + node, CMD_DELETE); + entry->slot = ExecInitExtraTupleSlot( + estate, RelationGetDescr(resultRelInfo->ri_RelationDesc), + &TTSOpsHeapTuple); + entry->withCheckOptions = NIL; + entry->withCheckOptionExprs = NIL; + } + + ExecStoreHeapTuple(heap_tuple, entry->slot, false); + + /* Silently skip if USING policy filters out this row */ + if (!check_security_quals(entry->qualExprs, entry->slot, econtext)) + { + table_endscan(scan_desc); + destroy_entity_result_rel_info(resultRelInfo); + continue; + } + } + /* * For vertices, we insert the vertex ID in the hashtable * vertex_id_htab. This hashtable is used later to process @@ -462,6 +507,9 @@ static void process_delete_list(CustomScanState *node) table_endscan(scan_desc); destroy_entity_result_rel_info(resultRelInfo); } + + /* Clean up the cache */ + hash_destroy(qual_cache); } /* @@ -485,9 +533,14 @@ static void check_for_connected_edges(CustomScanState *node) TableScanDesc scan_desc; HeapTuple tuple; TupleTableSlot *slot; + Oid relid; + bool rls_enabled = false; + List *qualExprs = NIL; + ExprContext *econtext = NULL; resultRelInfo = create_entity_result_rel_info(estate, graph_name, label_name); + relid = RelationGetRelid(resultRelInfo->ri_RelationDesc); estate->es_snapshot->curcid = GetCurrentCommandId(false); estate->es_output_cid = GetCurrentCommandId(false); scan_desc = table_beginscan(resultRelInfo->ri_RelationDesc, @@ -496,6 +549,22 @@ static void check_for_connected_edges(CustomScanState *node) estate, RelationGetDescr(resultRelInfo->ri_RelationDesc), &TTSOpsHeapTuple); + /* + * For DETACH DELETE with RLS enabled, compile the security qual + * expressions once per label for efficient evaluation. + */ + if (css->delete_data->detach) + { + /* Setup RLS security quals for this label */ + if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED) + { + rls_enabled = true; + econtext = css->css.ss.ps.ps_ExprContext; + qualExprs = setup_security_quals(resultRelInfo, estate, node, + CMD_DELETE); + } + } + /* for each row */ while (true) { @@ -533,6 +602,34 @@ static void check_for_connected_edges(CustomScanState *node) { if (css->delete_data->detach) { + AclResult aclresult; + + /* Check that the user has DELETE permission on the edge table */ + aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_DELETE); + if (aclresult != ACLCHECK_OK) + { + aclcheck_error(aclresult, OBJECT_TABLE, label_name); + } + + /* Check RLS security quals (USING policy) before delete */ + if (rls_enabled) + { + /* + * For DETACH DELETE, error out if edge RLS check fails. + * Unlike normal DELETE which silently skips, we cannot + * silently skip edges here as it would leave dangling + * edges pointing to deleted vertices. + */ + if (!check_security_quals(qualExprs, slot, econtext)) + { + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("cannot delete edge due to row-level security policy on \"%s\"", + label_name), + errhint("DETACH DELETE requires permission to delete all connected edges."))); + } + } + delete_entity(estate, resultRelInfo, tuple); } else diff --git a/src/backend/executor/cypher_merge.c b/src/backend/executor/cypher_merge.c index 9136825ab..e0d6c78e8 100644 --- a/src/backend/executor/cypher_merge.c +++ b/src/backend/executor/cypher_merge.c @@ -19,10 +19,12 @@ #include "postgres.h" +#include "utils/datum.h" +#include "utils/rls.h" + #include "catalog/ag_label.h" #include "executor/cypher_executor.h" #include "executor/cypher_utils.h" -#include "utils/datum.h" /* * The following structure is used to hold a single vertex or edge component @@ -180,6 +182,12 @@ static void begin_cypher_merge(CustomScanState *node, EState *estate, cypher_node->prop_expr_state = ExecInitExpr(cypher_node->prop_expr, (PlanState *)node); } + + /* Setup RLS WITH CHECK policies if RLS is enabled */ + if (check_enable_rls(rel->rd_id, InvalidOid, true) == RLS_ENABLED) + { + setup_wcos(cypher_node->resultRelInfo, estate, node, CMD_INSERT); + } } /* diff --git a/src/backend/executor/cypher_set.c b/src/backend/executor/cypher_set.c index d1837fb16..40cf2b232 100644 --- a/src/backend/executor/cypher_set.c +++ b/src/backend/executor/cypher_set.c @@ -19,7 +19,10 @@ #include "postgres.h" +#include "common/hashfn.h" +#include "executor/executor.h" #include "storage/bufmgr.h" +#include "utils/rls.h" #include "executor/cypher_executor.h" #include "executor/cypher_utils.h" @@ -125,6 +128,13 @@ static HeapTuple update_entity_tuple(ResultRelInfo *resultRelInfo, ExecConstraints(resultRelInfo, elemTupleSlot, estate); } + /* Check RLS WITH CHECK policies if configured */ + if (resultRelInfo->ri_WithCheckOptions != NIL) + { + ExecWithCheckOptions(WCO_RLS_UPDATE_CHECK, resultRelInfo, + elemTupleSlot, estate); + } + result = table_tuple_update(resultRelInfo->ri_RelationDesc, &tuple->t_self, elemTupleSlot, cid, estate->es_snapshot, @@ -355,9 +365,20 @@ static void process_update_list(CustomScanState *node) EState *estate = css->css.ss.ps.state; int *luindex = NULL; int lidx = 0; + HTAB *qual_cache = NULL; + HASHCTL hashctl; /* allocate an array to hold the last update index of each 'entity' */ luindex = palloc0(sizeof(int) * scanTupleSlot->tts_nvalid); + + /* Hash table for caching compiled security quals per label */ + MemSet(&hashctl, 0, sizeof(hashctl)); + hashctl.keysize = sizeof(Oid); + hashctl.entrysize = sizeof(RLSCacheEntry); + hashctl.hcxt = CurrentMemoryContext; + qual_cache = hash_create("update_qual_cache", 8, &hashctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); + /* * Iterate through the SET items list and store the loop index of each * 'entity' update. As there is only one entry for each entity, this will @@ -505,6 +526,38 @@ static void process_update_list(CustomScanState *node) estate, RelationGetDescr(resultRelInfo->ri_RelationDesc), &TTSOpsHeapTuple); + /* Setup RLS policies if RLS is enabled */ + if (check_enable_rls(resultRelInfo->ri_RelationDesc->rd_id, + InvalidOid, true) == RLS_ENABLED) + { + Oid relid = RelationGetRelid(resultRelInfo->ri_RelationDesc); + RLSCacheEntry *entry; + bool found; + + /* Get cached RLS state for this label, or set it up */ + entry = hash_search(qual_cache, &relid, HASH_ENTER, &found); + if (!found) + { + /* Setup WITH CHECK policies */ + setup_wcos(resultRelInfo, estate, node, CMD_UPDATE); + entry->withCheckOptions = resultRelInfo->ri_WithCheckOptions; + entry->withCheckOptionExprs = resultRelInfo->ri_WithCheckOptionExprs; + + /* Setup security quals */ + entry->qualExprs = setup_security_quals(resultRelInfo, estate, + node, CMD_UPDATE); + entry->slot = ExecInitExtraTupleSlot( + estate, RelationGetDescr(resultRelInfo->ri_RelationDesc), + &TTSOpsHeapTuple); + } + else + { + /* Use cached WCOs */ + resultRelInfo->ri_WithCheckOptions = entry->withCheckOptions; + resultRelInfo->ri_WithCheckOptionExprs = entry->withCheckOptionExprs; + } + } + /* * Now that we have the updated properties, create a either a vertex or * edge Datum for the in-memory update, and setup the tupleTableSlot @@ -580,8 +633,36 @@ static void process_update_list(CustomScanState *node) */ if (HeapTupleIsValid(heap_tuple)) { - heap_tuple = update_entity_tuple(resultRelInfo, slot, estate, - heap_tuple); + bool should_update = true; + Oid relid = RelationGetRelid(resultRelInfo->ri_RelationDesc); + + /* Check RLS security quals (USING policy) before update */ + if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED) + { + RLSCacheEntry *entry; + + /* Entry was already created earlier when setting up WCOs */ + entry = hash_search(qual_cache, &relid, HASH_FIND, NULL); + if (!entry) + { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("missing RLS cache entry for relation %u", + relid))); + } + + ExecStoreHeapTuple(heap_tuple, entry->slot, false); + should_update = check_security_quals(entry->qualExprs, + entry->slot, + econtext); + } + + /* Silently skip if USING policy filters out this row */ + if (should_update) + { + heap_tuple = update_entity_tuple(resultRelInfo, slot, estate, + heap_tuple); + } } /* close the ScanDescription */ table_endscan(scan_desc); @@ -595,6 +676,10 @@ static void process_update_list(CustomScanState *node) /* increment loop index */ lidx++; } + + /* Clean up the cache */ + hash_destroy(qual_cache); + /* free our lookup array */ pfree_if_not_null(luindex); } diff --git a/src/backend/executor/cypher_utils.c b/src/backend/executor/cypher_utils.c index c8d568831..eff829925 100644 --- a/src/backend/executor/cypher_utils.c +++ b/src/backend/executor/cypher_utils.c @@ -24,14 +24,36 @@ #include "postgres.h" +#include "executor/executor.h" +#include "miscadmin.h" #include "nodes/makefuncs.h" #include "parser/parse_relation.h" +#include "rewrite/rewriteManip.h" +#include "rewrite/rowsecurity.h" +#include "utils/acl.h" +#include "utils/rls.h" #include "catalog/ag_label.h" #include "commands/label_commands.h" #include "executor/cypher_utils.h" #include "utils/ag_cache.h" +/* RLS helper function declarations */ +static void get_policies_for_relation(Relation relation, CmdType cmd, + Oid user_id, List **permissive_policies, + List **restrictive_policies); +static void add_with_check_options(Relation rel, int rt_index, WCOKind kind, + List *permissive_policies, + List *restrictive_policies, + List **withCheckOptions, bool *hasSubLinks, + bool force_using); +static void add_security_quals(int rt_index, List *permissive_policies, + List *restrictive_policies, + List **securityQuals, bool *hasSubLinks); +static void sort_policies_by_name(List *policies); +static int row_security_policy_cmp(const ListCell *a, const ListCell *b); +static bool check_role_for_policy(ArrayType *policy_roles, Oid user_id); + /* * Given the graph name and the label name, create a ResultRelInfo for the table * those two variables represent. Open the Indices too. @@ -255,6 +277,13 @@ HeapTuple insert_entity_tuple_cid(ResultRelInfo *resultRelInfo, ExecConstraints(resultRelInfo, elemTupleSlot, estate); } + /* Check RLS WITH CHECK policies if configured */ + if (resultRelInfo->ri_WithCheckOptions != NIL) + { + ExecWithCheckOptions(WCO_RLS_INSERT_CHECK, resultRelInfo, + elemTupleSlot, estate); + } + /* Insert the tuple normally */ table_tuple_insert(resultRelInfo->ri_RelationDesc, elemTupleSlot, cid, 0, NULL); @@ -268,3 +297,754 @@ HeapTuple insert_entity_tuple_cid(ResultRelInfo *resultRelInfo, return tuple; } + +/* + * setup_wcos + * + * WithCheckOptions are added during the rewrite phase, but since AGE uses + * CMD_SELECT for all queries, WCOs don't get added for CREATE/SET/MERGE + * operations. This function compensates by adding WCOs at execution time. + * + * Based on PostgreSQL's row security implementation in rowsecurity.c + */ +void setup_wcos(ResultRelInfo *resultRelInfo, EState *estate, + CustomScanState *node, CmdType cmd) +{ + List *permissive_policies; + List *restrictive_policies; + List *withCheckOptions = NIL; + List *wcoExprs = NIL; + ListCell *lc; + Relation rel; + Oid user_id; + int rt_index; + WCOKind wco_kind; + bool hasSubLinks = false; + + /* Determine the WCO kind based on command type */ + if (cmd == CMD_INSERT) + { + wco_kind = WCO_RLS_INSERT_CHECK; + } + else if (cmd == CMD_UPDATE) + { + wco_kind = WCO_RLS_UPDATE_CHECK; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg_internal("unexpected command type for setup_wcos"))); + } + + rel = resultRelInfo->ri_RelationDesc; + + /* + * Use rt_index=1 since we're evaluating policies against a single relation. + * Policy quals are stored with varno=1, and we set ecxt_scantuple to the + * tuple we want to check, so keeping varno=1 is correct. + */ + rt_index = 1; + user_id = GetUserId(); + + /* Get the policies for the specified command type */ + get_policies_for_relation(rel, cmd, user_id, + &permissive_policies, + &restrictive_policies); + + /* Build WithCheckOptions from the policies */ + add_with_check_options(rel, rt_index, wco_kind, + permissive_policies, + restrictive_policies, + &withCheckOptions, + &hasSubLinks, + false); + + /* Compile the WCO expressions */ + foreach(lc, withCheckOptions) + { + WithCheckOption *wco = lfirst_node(WithCheckOption, lc); + ExprState *wcoExpr; + + /* Ensure qual is a List for ExecInitQual */ + if (!IsA(wco->qual, List)) + { + wco->qual = (Node *) list_make1(wco->qual); + } + + wcoExpr = ExecInitQual((List *) wco->qual, (PlanState *) node); + wcoExprs = lappend(wcoExprs, wcoExpr); + } + + /* Set up the ResultRelInfo with WCOs */ + resultRelInfo->ri_WithCheckOptions = withCheckOptions; + resultRelInfo->ri_WithCheckOptionExprs = wcoExprs; +} + +/* + * get_policies_for_relation + * + * Returns lists of permissive and restrictive policies to be applied to the + * specified relation, based on the command type and role. + * + * This includes any policies added by extensions. + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static void +get_policies_for_relation(Relation relation, CmdType cmd, Oid user_id, + List **permissive_policies, + List **restrictive_policies) +{ + ListCell *item; + + *permissive_policies = NIL; + *restrictive_policies = NIL; + + /* No policies if RLS descriptor is not present */ + if (relation->rd_rsdesc == NULL) + { + return; + } + + /* First find all internal policies for the relation. */ + foreach(item, relation->rd_rsdesc->policies) + { + bool cmd_matches = false; + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + + /* Always add ALL policies, if they exist. */ + if (policy->polcmd == '*') + { + cmd_matches = true; + } + else + { + /* Check whether the policy applies to the specified command type */ + switch (cmd) + { + case CMD_SELECT: + if (policy->polcmd == ACL_SELECT_CHR) + { + cmd_matches = true; + } + break; + case CMD_INSERT: + if (policy->polcmd == ACL_INSERT_CHR) + { + cmd_matches = true; + } + break; + case CMD_UPDATE: + if (policy->polcmd == ACL_UPDATE_CHR) + { + cmd_matches = true; + } + break; + case CMD_DELETE: + if (policy->polcmd == ACL_DELETE_CHR) + { + cmd_matches = true; + } + break; + case CMD_MERGE: + /* + * We do not support a separate policy for MERGE command. + * Instead it derives from the policies defined for other + * commands. + */ + break; + default: + elog(ERROR, "unrecognized policy command type %d", + (int) cmd); + break; + } + } + + /* + * Add this policy to the relevant list of policies if it applies to + * the specified role. + */ + if (cmd_matches && check_role_for_policy(policy->roles, user_id)) + { + if (policy->permissive) + { + *permissive_policies = lappend(*permissive_policies, policy); + } + else + { + *restrictive_policies = lappend(*restrictive_policies, policy); + } + } + } + + /* + * We sort restrictive policies by name so that any WCOs they generate are + * checked in a well-defined order. + */ + sort_policies_by_name(*restrictive_policies); + + /* + * Then add any permissive or restrictive policies defined by extensions. + * These are simply appended to the lists of internal policies, if they + * apply to the specified role. + */ + if (row_security_policy_hook_restrictive) + { + List *hook_policies = + (*row_security_policy_hook_restrictive) (cmd, relation); + + /* + * As with built-in restrictive policies, we sort any hook-provided + * restrictive policies by name also. Note that we also intentionally + * always check all built-in restrictive policies, in name order, + * before checking restrictive policies added by hooks, in name order. + */ + sort_policies_by_name(hook_policies); + + foreach(item, hook_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + + if (check_role_for_policy(policy->roles, user_id)) + { + *restrictive_policies = lappend(*restrictive_policies, policy); + } + } + } + + if (row_security_policy_hook_permissive) + { + List *hook_policies = + (*row_security_policy_hook_permissive) (cmd, relation); + + foreach(item, hook_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + + if (check_role_for_policy(policy->roles, user_id)) + { + *permissive_policies = lappend(*permissive_policies, policy); + } + } + } +} + +/* + * add_with_check_options + * + * Add WithCheckOptions of the specified kind to check that new records + * added by an INSERT or UPDATE are consistent with the specified RLS + * policies. Normally new data must satisfy the WITH CHECK clauses from the + * policies. If a policy has no explicit WITH CHECK clause, its USING clause + * is used instead. In the special case of an UPDATE arising from an + * INSERT ... ON CONFLICT DO UPDATE, existing records are first checked using + * a WCO_RLS_CONFLICT_CHECK WithCheckOption, which always uses the USING + * clauses from RLS policies. + * + * New WCOs are added to withCheckOptions, and hasSubLinks is set to true if + * any of the check clauses added contain sublink subqueries. + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static void +add_with_check_options(Relation rel, + int rt_index, + WCOKind kind, + List *permissive_policies, + List *restrictive_policies, + List **withCheckOptions, + bool *hasSubLinks, + bool force_using) +{ + ListCell *item; + List *permissive_quals = NIL; + +#define QUAL_FOR_WCO(policy) \ + ( !force_using && \ + (policy)->with_check_qual != NULL ? \ + (policy)->with_check_qual : (policy)->qual ) + + /* + * First collect up the permissive policy clauses, similar to + * add_security_quals. + */ + foreach(item, permissive_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + Expr *qual = QUAL_FOR_WCO(policy); + + if (qual != NULL) + { + permissive_quals = lappend(permissive_quals, copyObject(qual)); + *hasSubLinks |= policy->hassublinks; + } + } + + /* + * There must be at least one permissive qual found or no rows are allowed + * to be added. This is the same as in add_security_quals. + * + * If there are no permissive_quals then we fall through and return a + * single 'false' WCO, preventing all new rows. + */ + if (permissive_quals != NIL) + { + /* + * Add a single WithCheckOption for all the permissive policy clauses, + * combining them together using OR. This check has no policy name, + * since if the check fails it means that no policy granted permission + * to perform the update, rather than any particular policy being + * violated. + */ + WithCheckOption *wco; + + wco = makeNode(WithCheckOption); + wco->kind = kind; + wco->relname = pstrdup(RelationGetRelationName(rel)); + wco->polname = NULL; + wco->cascaded = false; + + if (list_length(permissive_quals) == 1) + { + wco->qual = (Node *) linitial(permissive_quals); + } + else + { + wco->qual = (Node *) makeBoolExpr(OR_EXPR, permissive_quals, -1); + } + + ChangeVarNodes(wco->qual, 1, rt_index, 0); + + *withCheckOptions = list_append_unique(*withCheckOptions, wco); + + /* + * Now add WithCheckOptions for each of the restrictive policy clauses + * (which will be combined together using AND). We use a separate + * WithCheckOption for each restrictive policy to allow the policy + * name to be included in error reports if the policy is violated. + */ + foreach(item, restrictive_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + Expr *qual = QUAL_FOR_WCO(policy); + + if (qual != NULL) + { + qual = copyObject(qual); + ChangeVarNodes((Node *) qual, 1, rt_index, 0); + + wco = makeNode(WithCheckOption); + wco->kind = kind; + wco->relname = pstrdup(RelationGetRelationName(rel)); + wco->polname = pstrdup(policy->policy_name); + wco->qual = (Node *) qual; + wco->cascaded = false; + + *withCheckOptions = list_append_unique(*withCheckOptions, wco); + *hasSubLinks |= policy->hassublinks; + } + } + } + else + { + /* + * If there were no policy clauses to check new data, add a single + * always-false WCO (a default-deny policy). + */ + WithCheckOption *wco; + + wco = makeNode(WithCheckOption); + wco->kind = kind; + wco->relname = pstrdup(RelationGetRelationName(rel)); + wco->polname = NULL; + wco->qual = (Node *) makeConst(BOOLOID, -1, InvalidOid, + sizeof(bool), BoolGetDatum(false), + false, true); + wco->cascaded = false; + + *withCheckOptions = lappend(*withCheckOptions, wco); + } +} + +/* + * sort_policies_by_name + * + * This is only used for restrictive policies, ensuring that any + * WithCheckOptions they generate are applied in a well-defined order. + * This is not necessary for permissive policies, since they are all combined + * together using OR into a single WithCheckOption check. + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static void +sort_policies_by_name(List *policies) +{ + list_sort(policies, row_security_policy_cmp); +} + +/* + * list_sort comparator to sort RowSecurityPolicy entries by name + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static int +row_security_policy_cmp(const ListCell *a, const ListCell *b) +{ + const RowSecurityPolicy *pa = (const RowSecurityPolicy *) lfirst(a); + const RowSecurityPolicy *pb = (const RowSecurityPolicy *) lfirst(b); + + /* Guard against NULL policy names from extensions */ + if (pa->policy_name == NULL) + { + return pb->policy_name == NULL ? 0 : 1; + } + if (pb->policy_name == NULL) + { + return -1; + } + + return strcmp(pa->policy_name, pb->policy_name); +} + +/* + * check_role_for_policy - + * determines if the policy should be applied for the current role + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static bool +check_role_for_policy(ArrayType *policy_roles, Oid user_id) +{ + int i; + Oid *roles = (Oid *) ARR_DATA_PTR(policy_roles); + + /* Quick fall-thru for policies applied to all roles */ + if (roles[0] == ACL_ID_PUBLIC) + { + return true; + } + + for (i = 0; i < ARR_DIMS(policy_roles)[0]; i++) + { + if (has_privs_of_role(user_id, roles[i])) + { + return true; + } + } + + return false; +} + +/* + * add_security_quals + * + * Add security quals to enforce the specified RLS policies, restricting + * access to existing data in a table. If there are no policies controlling + * access to the table, then all access is prohibited --- i.e., an implicit + * default-deny policy is used. + * + * New security quals are added to securityQuals, and hasSubLinks is set to + * true if any of the quals added contain sublink subqueries. + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static void +add_security_quals(int rt_index, + List *permissive_policies, + List *restrictive_policies, + List **securityQuals, + bool *hasSubLinks) +{ + ListCell *item; + List *permissive_quals = NIL; + Expr *rowsec_expr; + + /* + * First collect up the permissive quals. If we do not find any + * permissive policies then no rows are visible (this is handled below). + */ + foreach(item, permissive_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + + if (policy->qual != NULL) + { + permissive_quals = lappend(permissive_quals, + copyObject(policy->qual)); + *hasSubLinks |= policy->hassublinks; + } + } + + /* + * We must have permissive quals, always, or no rows are visible. + * + * If we do not, then we simply return a single 'false' qual which results + * in no rows being visible. + */ + if (permissive_quals != NIL) + { + /* + * We now know that permissive policies exist, so we can now add + * security quals based on the USING clauses from the restrictive + * policies. Since these need to be combined together using AND, we + * can just add them one at a time. + */ + foreach(item, restrictive_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + Expr *qual; + + if (policy->qual != NULL) + { + qual = copyObject(policy->qual); + ChangeVarNodes((Node *) qual, 1, rt_index, 0); + + *securityQuals = list_append_unique(*securityQuals, qual); + *hasSubLinks |= policy->hassublinks; + } + } + + /* + * Then add a single security qual combining together the USING + * clauses from all the permissive policies using OR. + */ + if (list_length(permissive_quals) == 1) + { + rowsec_expr = (Expr *) linitial(permissive_quals); + } + else + { + rowsec_expr = makeBoolExpr(OR_EXPR, permissive_quals, -1); + } + + ChangeVarNodes((Node *) rowsec_expr, 1, rt_index, 0); + *securityQuals = list_append_unique(*securityQuals, rowsec_expr); + } + else + { + /* + * A permissive policy must exist for rows to be visible at all. + * Therefore, if there were no permissive policies found, return a + * single always-false clause. + */ + *securityQuals = lappend(*securityQuals, + makeConst(BOOLOID, -1, InvalidOid, + sizeof(bool), BoolGetDatum(false), + false, true)); + } +} + +/* + * setup_security_quals + * + * Security quals (USING policies) are added during the rewrite phase, but + * since AGE uses CMD_SELECT for all queries, they don't get added for + * UPDATE/DELETE operations. This function sets up security quals at + * execution time to be evaluated against each tuple before modification. + * + * Returns a list of compiled ExprState for the security quals. + */ +List * +setup_security_quals(ResultRelInfo *resultRelInfo, EState *estate, + CustomScanState *node, CmdType cmd) +{ + List *permissive_policies; + List *restrictive_policies; + List *securityQuals = NIL; + List *qualExprs = NIL; + ListCell *lc; + Relation rel; + Oid user_id; + int rt_index; + bool hasSubLinks = false; + + /* Only UPDATE and DELETE have security quals */ + if (cmd != CMD_UPDATE && cmd != CMD_DELETE) + { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg_internal("unexpected command type for setup_security_quals"))); + } + + rel = resultRelInfo->ri_RelationDesc; + + /* If no RLS policies exist, return empty list */ + if (rel->rd_rsdesc == NULL) + { + return NIL; + } + + /* + * Use rt_index=1 since we're evaluating policies against a single relation. + * Policy quals are stored with varno=1, and we set ecxt_scantuple to the + * tuple we want to check, so keeping varno=1 is correct. + */ + rt_index = 1; + user_id = GetUserId(); + + /* Get the policies for the specified command type */ + get_policies_for_relation(rel, cmd, user_id, + &permissive_policies, + &restrictive_policies); + + /* Build security quals from the policies */ + add_security_quals(rt_index, permissive_policies, restrictive_policies, + &securityQuals, &hasSubLinks); + + /* Compile the security qual expressions */ + foreach(lc, securityQuals) + { + Expr *qual = (Expr *) lfirst(lc); + ExprState *qualExpr; + + /* Ensure qual is a List for ExecInitQual */ + if (!IsA(qual, List)) + { + qual = (Expr *) list_make1(qual); + } + + qualExpr = ExecInitQual((List *) qual, (PlanState *) node); + qualExprs = lappend(qualExprs, qualExpr); + } + + return qualExprs; +} + +/* + * check_security_quals + * + * Evaluate security quals against a tuple. Returns true if all quals pass + * (row can be modified), false if any qual fails (row should be silently + * skipped). + * + * This matches PostgreSQL's behavior where USING expressions for UPDATE/DELETE + * silently filter rows rather than raising errors. + */ +bool +check_security_quals(List *qualExprs, TupleTableSlot *slot, + ExprContext *econtext) +{ + ListCell *lc; + TupleTableSlot *saved_scantuple; + bool result = true; + + if (qualExprs == NIL) + { + return true; + } + + /* Save and set up the scan tuple for expression evaluation */ + saved_scantuple = econtext->ecxt_scantuple; + econtext->ecxt_scantuple = slot; + + foreach(lc, qualExprs) + { + ExprState *qualExpr = (ExprState *) lfirst(lc); + + if (!ExecQual(qualExpr, econtext)) + { + result = false; + break; + } + } + + econtext->ecxt_scantuple = saved_scantuple; + return result; +} + +/* + * check_rls_for_tuple + * + * Check RLS policies for a tuple without needing full executor context. + * Used by standalone functions like startNode()/endNode() that access + * tables directly. + * + * Returns true if the tuple passes RLS checks (or if RLS is not enabled), + * false if the tuple should be filtered out. + */ +bool +check_rls_for_tuple(Relation rel, HeapTuple tuple, CmdType cmd) +{ + List *permissive_policies; + List *restrictive_policies; + List *securityQuals = NIL; + ListCell *lc; + Oid user_id; + bool hasSubLinks = false; + bool result = true; + EState *estate; + ExprContext *econtext; + TupleTableSlot *slot; + + /* If RLS is not enabled, tuple passes */ + if (check_enable_rls(RelationGetRelid(rel), InvalidOid, true) != RLS_ENABLED) + { + return true; + } + + /* If no RLS policies exist on the relation, tuple passes */ + if (rel->rd_rsdesc == NULL) + { + return true; + } + + /* Get the policies for the specified command type */ + user_id = GetUserId(); + get_policies_for_relation(rel, cmd, user_id, + &permissive_policies, + &restrictive_policies); + + /* Build security quals from the policies (use rt_index=1) */ + add_security_quals(1, permissive_policies, restrictive_policies, + &securityQuals, &hasSubLinks); + + /* If no quals, tuple passes */ + if (securityQuals == NIL) + { + return true; + } + + /* Create minimal execution environment */ + estate = CreateExecutorState(); + econtext = CreateExprContext(estate); + + /* Create tuple slot and store the tuple */ + slot = MakeSingleTupleTableSlot(RelationGetDescr(rel), &TTSOpsHeapTuple); + ExecStoreHeapTuple(tuple, slot, false); + econtext->ecxt_scantuple = slot; + + /* Compile and evaluate each qual */ + foreach(lc, securityQuals) + { + Expr *qual = (Expr *) lfirst(lc); + ExprState *qualExpr; + List *qualList; + + /* ExecPrepareQual expects a List */ + if (!IsA(qual, List)) + { + qualList = list_make1(qual); + } + else + { + qualList = (List *) qual; + } + + /* Use ExecPrepareQual for standalone expression evaluation */ + qualExpr = ExecPrepareQual(qualList, estate); + + if (!ExecQual(qualExpr, econtext)) + { + result = false; + break; + } + } + + /* Clean up */ + ExecDropSingleTupleTableSlot(slot); + FreeExprContext(econtext, true); + FreeExecutorState(estate); + + return result; +} diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index 214fdaa49..1f2ab79ba 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -346,6 +346,100 @@ static bool isa_special_VLE_case(cypher_path *path); static ParseNamespaceItem *find_pnsi(cypher_parsestate *cpstate, char *varname); static bool has_list_comp_or_subquery(Node *expr, void *context); +/* + * Add required permissions to the RTEPermissionInfo for a relation. + * Recursively searches through RTEs including subqueries. + */ +static bool +add_rte_permissions_recurse(List *rtable, List *rteperminfos, + Oid relid, AclMode permissions) +{ + ListCell *lc; + + /* First check the perminfos at this level */ + foreach(lc, rteperminfos) + { + RTEPermissionInfo *perminfo = lfirst(lc); + + if (perminfo->relid == relid) + { + perminfo->requiredPerms |= permissions; + return true; + } + } + + /* Then recurse into subqueries */ + foreach(lc, rtable) + { + RangeTblEntry *rte = lfirst(lc); + + if (rte->rtekind == RTE_SUBQUERY && rte->subquery != NULL) + { + if (add_rte_permissions_recurse(rte->subquery->rtable, + rte->subquery->rteperminfos, + relid, permissions)) + { + return true; + } + } + } + + return false; +} + +/* + * Add required permissions to the RTEPermissionInfo for a relation. + * Searches through p_rteperminfos and subqueries for a matching relOid + * and adds the specified permissions to requiredPerms. + */ +static void +add_rte_permissions(ParseState *pstate, Oid relid, AclMode permissions) +{ + add_rte_permissions_recurse(pstate->p_rtable, pstate->p_rteperminfos, + relid, permissions); +} + +/* + * Add required permissions to the label table for a given entity variable. + * Looks up the entity by variable name, extracts its label, and adds + * the specified permissions to the corresponding RTEPermissionInfo. + */ +static void +add_entity_permissions(cypher_parsestate *cpstate, char *var_name, + AclMode permissions) +{ + ParseState *pstate = (ParseState *)cpstate; + transform_entity *entity; + char *label = NULL; + Oid relid; + + entity = find_variable(cpstate, var_name); + if (entity == NULL) + { + return; + } + + if (entity->type == ENT_VERTEX) + { + label = entity->entity.node->label; + } + else if (entity->type == ENT_EDGE) + { + label = entity->entity.rel->label; + } + + if (label == NULL) + { + return; + } + + relid = get_label_relation(label, cpstate->graph_oid); + if (OidIsValid(relid)) + { + add_rte_permissions(pstate, relid, permissions); + } +} + /* * transform a cypher_clause */ @@ -1556,6 +1650,9 @@ static List *transform_cypher_delete_item_list(cypher_parsestate *cpstate, parser_errposition(pstate, col->location))); } + /* Add ACL_DELETE permission to the entity's label table */ + add_entity_permissions(cpstate, val->sval, ACL_DELETE); + add_volatile_wrapper_to_target_entry(query->targetList, resno); pos = makeInteger(resno); @@ -1721,6 +1818,9 @@ cypher_update_information *transform_cypher_remove_item_list( parser_errposition(pstate, set_item->location))); } + /* Add ACL_UPDATE permission to the entity's label table */ + add_entity_permissions(cpstate, variable_name, ACL_UPDATE); + add_volatile_wrapper_to_target_entry(query->targetList, item->entity_position); @@ -1898,6 +1998,9 @@ cypher_update_information *transform_cypher_set_item_list( parser_errposition(pstate, set_item->location))); } + /* Add ACL_UPDATE permission to the entity's label table */ + add_entity_permissions(cpstate, variable_name, ACL_UPDATE); + add_volatile_wrapper_to_target_entry(query->targetList, item->entity_position); diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index 44d82997f..f190cfffe 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -44,7 +44,10 @@ #include "libpq/pqformat.h" #include "miscadmin.h" #include "parser/parse_coerce.h" +#include "nodes/nodes.h" +#include "utils/acl.h" #include "utils/builtins.h" +#include "executor/cypher_utils.h" #include "utils/float.h" #include "utils/lsyscache.h" #include "utils/snapmgr.h" @@ -5620,15 +5623,24 @@ static Datum get_vertex(const char *graph, const char *vertex_label, HeapTuple tuple; TupleDesc tupdesc; Datum id, properties, result; + AclResult aclresult; /* get the specific graph namespace (schema) */ Oid graph_namespace_oid = get_namespace_oid(graph, false); /* get the specific vertex label table (schema.vertex_label) */ Oid vertex_label_table_oid = get_relname_relid(vertex_label, - graph_namespace_oid); + graph_namespace_oid); /* get the active snapshot */ Snapshot snapshot = GetActiveSnapshot(); + /* check for SELECT permission on the table */ + aclresult = pg_class_aclcheck(vertex_label_table_oid, GetUserId(), + ACL_SELECT); + if (aclresult != ACLCHECK_OK) + { + aclcheck_error(aclresult, OBJECT_TABLE, vertex_label); + } + /* initialize the scan key */ ScanKeyInit(&scan_keys[0], 1, BTEqualStrategyNumber, F_OIDEQ, Int64GetDatum(graphid)); @@ -5641,11 +5653,24 @@ static Datum get_vertex(const char *graph, const char *vertex_label, /* bail if the tuple isn't valid */ if (!HeapTupleIsValid(tuple)) { + table_endscan(scan_desc); + table_close(graph_vertex_label, ShareLock); ereport(ERROR, (errcode(ERRCODE_UNDEFINED_TABLE), errmsg("graphid %lu does not exist", graphid))); } + /* Check RLS policies - error if filtered out */ + if (!check_rls_for_tuple(graph_vertex_label, tuple, CMD_SELECT)) + { + table_endscan(scan_desc); + table_close(graph_vertex_label, ShareLock); + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("access to vertex %lu denied by row-level security policy on \"%s\"", + graphid, vertex_label))); + } + /* get the tupdesc - we don't need to release this one */ tupdesc = RelationGetDescr(graph_vertex_label); /* bail if the number of columns differs */ diff --git a/src/include/executor/cypher_utils.h b/src/include/executor/cypher_utils.h index 0798f153c..fc4067455 100644 --- a/src/include/executor/cypher_utils.h +++ b/src/include/executor/cypher_utils.h @@ -21,6 +21,7 @@ #define AG_CYPHER_UTILS_H #include "access/heapam.h" +#include "nodes/execnodes.h" #include "nodes/cypher_nodes.h" #include "utils/agtype.h" @@ -127,4 +128,25 @@ HeapTuple insert_entity_tuple_cid(ResultRelInfo *resultRelInfo, TupleTableSlot *elemTupleSlot, EState *estate, CommandId cid); +/* RLS support */ +void setup_wcos(ResultRelInfo *resultRelInfo, EState *estate, + CustomScanState *node, CmdType cmd); +List *setup_security_quals(ResultRelInfo *resultRelInfo, EState *estate, + CustomScanState *node, CmdType cmd); +bool check_security_quals(List *qualExprs, TupleTableSlot *slot, + ExprContext *econtext); +bool check_rls_for_tuple(Relation rel, HeapTuple tuple, CmdType cmd); + +/* Hash table entry for caching RLS state per label */ +typedef struct RLSCacheEntry +{ + Oid relid; /* hash key */ + /* Security quals (USING policies) for UPDATE/DELETE */ + List *qualExprs; + TupleTableSlot *slot; /* slot for old tuple (RLS check) */ + /* WCOs - used only in SET */ + List *withCheckOptions; + List *withCheckOptionExprs; +} RLSCacheEntry; + #endif From b4503ad97fc9c6b84a6ef75734a7e4395e97d98e Mon Sep 17 00:00:00 2001 From: Muhammad Taha Naveed Date: Thu, 22 Jan 2026 21:50:23 +0500 Subject: [PATCH 20/20] Fix upgrade script for 1.6.0 to 1.7.0 (#2320) - Added index creation for existing labels Assisted-by AI --- age--1.6.0--y.y.y.sql | 97 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/age--1.6.0--y.y.y.sql b/age--1.6.0--y.y.y.sql index 8c58bb3eb..0ccf64f28 100644 --- a/age--1.6.0--y.y.y.sql +++ b/age--1.6.0--y.y.y.sql @@ -53,3 +53,100 @@ CREATE FUNCTION ag_catalog._ag_enforce_edge_uniqueness4(graphid, graphid, graphi STABLE PARALLEL SAFE as 'MODULE_PATHNAME'; + +-- Create indexes on id columns for existing labels +-- Vertex labels get PRIMARY KEY on id, Edge labels get indexes on start_id/end_id +DO $$ +DECLARE + label_rec record; + schema_name text; + table_name text; + idx_exists boolean; + pk_exists boolean; + idx_name text; +BEGIN + FOR label_rec IN + SELECT l.relation, l.kind + FROM ag_catalog.ag_label l + LOOP + SELECT n.nspname, c.relname INTO schema_name, table_name + FROM pg_class c + JOIN pg_namespace n ON c.relnamespace = n.oid + WHERE c.oid = label_rec.relation; + + IF label_rec.kind = 'e' THEN + -- Edge: check/create index on start_id + SELECT EXISTS ( + SELECT 1 FROM pg_index i + JOIN pg_class c ON c.oid = i.indexrelid + JOIN pg_am am ON am.oid = c.relam + JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = i.indkey[0] + WHERE i.indrelid = label_rec.relation + AND a.attname = 'start_id' + AND i.indpred IS NULL -- not a partial index + AND i.indexprs IS NULL -- not an expression index + AND am.amname = 'btree' -- btree access method + ) INTO idx_exists; + + IF NOT idx_exists THEN + EXECUTE format('CREATE INDEX %I ON %I.%I USING btree (start_id)', + table_name || '_start_id_idx', schema_name, table_name); + END IF; + + -- Edge: check/create index on end_id + SELECT EXISTS ( + SELECT 1 FROM pg_index i + JOIN pg_class c ON c.oid = i.indexrelid + JOIN pg_am am ON am.oid = c.relam + JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = i.indkey[0] + WHERE i.indrelid = label_rec.relation + AND a.attname = 'end_id' + AND i.indpred IS NULL -- not a partial index + AND i.indexprs IS NULL -- not an expression index + AND am.amname = 'btree' -- btree access method + ) INTO idx_exists; + + IF NOT idx_exists THEN + EXECUTE format('CREATE INDEX %I ON %I.%I USING btree (end_id)', + table_name || '_end_id_idx', schema_name, table_name); + END IF; + ELSE + -- Vertex: check/create PRIMARY KEY on id + SELECT EXISTS ( + SELECT 1 FROM pg_constraint + WHERE conrelid = label_rec.relation AND contype = 'p' + ) INTO pk_exists; + + IF NOT pk_exists THEN + -- Check if a usable unique index on id already exists + SELECT c.relname INTO idx_name + FROM pg_index i + JOIN pg_class c ON c.oid = i.indexrelid + JOIN pg_am am ON am.oid = c.relam + WHERE i.indrelid = label_rec.relation + AND i.indisunique = true + AND i.indpred IS NULL -- not a partial index + AND i.indexprs IS NULL -- not an expression index + AND am.amname = 'btree' -- btree access method + AND i.indnkeyatts = 1 -- single column index + AND EXISTS ( + SELECT 1 FROM pg_attribute a + WHERE a.attrelid = i.indrelid + AND a.attnum = i.indkey[0] + AND a.attname = 'id' + ) + LIMIT 1; + + IF idx_name IS NOT NULL THEN + -- Reuse existing unique index for primary key + EXECUTE format('ALTER TABLE %I.%I ADD CONSTRAINT %I PRIMARY KEY USING INDEX %I', + schema_name, table_name, table_name || '_pkey', idx_name); + ELSE + -- Create new primary key + EXECUTE format('ALTER TABLE %I.%I ADD PRIMARY KEY (id)', + schema_name, table_name); + END IF; + END IF; + END IF; + END LOOP; +END $$;