Skip to content

Commit f0ed4c9

Browse files
committed
Fix MERGE ON CREATE/MATCH SET crash when RHS references a bound variable
When MERGE has a previous clause (e.g. MATCH, UNWIND), transform_cypher_merge takes the lateral-left-join path via transform_merge_make_lateral_join. That helper called addRangeTableEntryForJoin with nscolumns=NULL, leaving the join ParseNamespaceItem's p_nscolumns unset. For queries that did not subsequently resolve a column reference against that nsitem (e.g. RETURN, which runs in a fresh namespace built by handle_prev_clause), the NULL was harmless. Our ON CREATE / ON MATCH SET transform runs in-line, before the MERGE query becomes a subquery, so transform_cypher_set_item_list consulted the join's nsitem directly. colNameToVar -> scanNSItemForColumn then dereferenced p_nscolumns[attnum-1] = NULL[0] and the backend segfaulted on any ON SET whose RHS referenced a bound variable. Reported by MuhammadTahaNaveed on PR #2347. Populate the join's p_nscolumns from res_colvars. The Var we end up producing for a bound entity lives inside prop_expr, which is opaque to the planner, so it is not rewritten to match the plan's output slots. At ExecEvalScalarVar time only varattno is consulted, and scantuple's layout mirrors the join's eref->colnames (via make_target_list_from_join). Use the join rtindex and 1-based eref position so scantuple[varattno - 1] resolves to the correct entity column at runtime; without this, Vars for a (varno=l_rte) and b (varno=r_rte) with varattno=1 both hit scantuple[0] and b.id evaluated to a.id. Also initialise apply_update_list's new_property_value at its declaration. All control paths reach the single alter_property_value call with the variable set, but -Wmaybe-uninitialized fires at -O2 because the compiler cannot prove remove_property == isnull when prop_expr is non-NULL. Regression: six new cases in cypher_merge covering outer-ref RHS, self-ref RHS with a previous clause (the silent-wrong-value variant we initially missed), UNWIND-driven MERGE with self-ref (Muhammad's second reproducer), multi-item ON CREATE SET mixing reference kinds, and ON MATCH SET with a variable RHS on the match-branch second run. Cassert installcheck: 33/33.
1 parent 39a2cc8 commit f0ed4c9

4 files changed

Lines changed: 176 additions & 5 deletions

File tree

regress/expected/cypher_merge.out

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2160,6 +2160,77 @@ $$) AS (name agtype);
21602160
"test"
21612161
(1 row)
21622162

2163+
-- Issue #2347: RHS of ON CREATE / ON MATCH SET referencing a bound
2164+
-- variable crashed the backend when MERGE had a previous clause, because
2165+
-- the lateral-join's ParseNamespaceItem had p_nscolumns=NULL.
2166+
-- ON CREATE SET with RHS referencing the outer MATCH's variable
2167+
SELECT * FROM cypher('merge_actions', $$ CREATE (:Person {name:'Anchor'}) $$) AS (a agtype);
2168+
a
2169+
---
2170+
(0 rows)
2171+
2172+
SELECT * FROM cypher('merge_actions', $$
2173+
MATCH (a:Person {name: 'Anchor'})
2174+
MERGE (b:Person {name: 'FromOuter'})
2175+
ON CREATE SET b.source_name = a.name
2176+
RETURN a.name, b.name, b.source_name
2177+
$$) AS (a_name agtype, b_name agtype, b_source agtype);
2178+
a_name | b_name | b_source
2179+
----------+-------------+----------
2180+
"Anchor" | "FromOuter" | "Anchor"
2181+
(1 row)
2182+
2183+
-- ON CREATE SET with RHS referencing the MERGE-bound variable itself
2184+
SELECT * FROM cypher('merge_actions', $$
2185+
MATCH (a:Person {name: 'Anchor'})
2186+
MERGE (b:Person {name: 'SelfRef'})
2187+
ON CREATE SET b.echo_name = b.name
2188+
RETURN b.name, b.echo_name
2189+
$$) AS (b_name agtype, b_echo agtype);
2190+
b_name | b_echo
2191+
-----------+-----------
2192+
"SelfRef" | "SelfRef"
2193+
(1 row)
2194+
2195+
-- ON CREATE SET driven by UNWIND with self-reference on the RHS
2196+
-- (Muhammad's second reproducer)
2197+
SELECT * FROM cypher('merge_actions', $$
2198+
UNWIND ['U1', 'U2'] AS nm
2199+
MERGE (n:Person {name: nm})
2200+
ON CREATE SET n.copy_name = n.name
2201+
RETURN n.name, n.copy_name
2202+
$$) AS (n_name agtype, n_copy agtype);
2203+
n_name | n_copy
2204+
--------+--------
2205+
"U1" | "U1"
2206+
"U2" | "U2"
2207+
(2 rows)
2208+
2209+
-- Multiple SET items mixing outer-ref, self-ref, and literal RHS
2210+
SELECT * FROM cypher('merge_actions', $$
2211+
MATCH (a:Person {name: 'Anchor'})
2212+
MERGE (b:Person {name: 'MultiItem'})
2213+
ON CREATE SET b.from_a = a.name, b.self = b.name, b.lit = 'literal'
2214+
RETURN b.from_a, b.self, b.lit
2215+
$$) AS (fa agtype, sf agtype, lit agtype);
2216+
fa | sf | lit
2217+
----------+-------------+-----------
2218+
"Anchor" | "MultiItem" | "literal"
2219+
(1 row)
2220+
2221+
-- ON MATCH SET with variable RHS (second run on existing node)
2222+
SELECT * FROM cypher('merge_actions', $$
2223+
MATCH (a:Person {name: 'Anchor'})
2224+
MERGE (b:Person {name: 'FromOuter'})
2225+
ON CREATE SET b.source_name = a.name
2226+
ON MATCH SET b.last_seen_by = a.name
2227+
RETURN b.source_name, b.last_seen_by
2228+
$$) AS (src agtype, last agtype);
2229+
src | last
2230+
----------+----------
2231+
"Anchor" | "Anchor"
2232+
(1 row)
2233+
21632234
-- cleanup
21642235
SELECT * FROM cypher('merge_actions', $$ MATCH (n) DETACH DELETE n $$) AS (a agtype);
21652236
a

regress/sql/cypher_merge.sql

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1038,6 +1038,53 @@ SELECT * FROM cypher('merge_actions', $$
10381038
RETURN n.name
10391039
$$) AS (name agtype);
10401040

1041+
-- Issue #2347: RHS of ON CREATE / ON MATCH SET referencing a bound
1042+
-- variable crashed the backend when MERGE had a previous clause, because
1043+
-- the lateral-join's ParseNamespaceItem had p_nscolumns=NULL.
1044+
1045+
-- ON CREATE SET with RHS referencing the outer MATCH's variable
1046+
SELECT * FROM cypher('merge_actions', $$ CREATE (:Person {name:'Anchor'}) $$) AS (a agtype);
1047+
SELECT * FROM cypher('merge_actions', $$
1048+
MATCH (a:Person {name: 'Anchor'})
1049+
MERGE (b:Person {name: 'FromOuter'})
1050+
ON CREATE SET b.source_name = a.name
1051+
RETURN a.name, b.name, b.source_name
1052+
$$) AS (a_name agtype, b_name agtype, b_source agtype);
1053+
1054+
-- ON CREATE SET with RHS referencing the MERGE-bound variable itself
1055+
SELECT * FROM cypher('merge_actions', $$
1056+
MATCH (a:Person {name: 'Anchor'})
1057+
MERGE (b:Person {name: 'SelfRef'})
1058+
ON CREATE SET b.echo_name = b.name
1059+
RETURN b.name, b.echo_name
1060+
$$) AS (b_name agtype, b_echo agtype);
1061+
1062+
-- ON CREATE SET driven by UNWIND with self-reference on the RHS
1063+
-- (Muhammad's second reproducer)
1064+
SELECT * FROM cypher('merge_actions', $$
1065+
UNWIND ['U1', 'U2'] AS nm
1066+
MERGE (n:Person {name: nm})
1067+
ON CREATE SET n.copy_name = n.name
1068+
RETURN n.name, n.copy_name
1069+
$$) AS (n_name agtype, n_copy agtype);
1070+
1071+
-- Multiple SET items mixing outer-ref, self-ref, and literal RHS
1072+
SELECT * FROM cypher('merge_actions', $$
1073+
MATCH (a:Person {name: 'Anchor'})
1074+
MERGE (b:Person {name: 'MultiItem'})
1075+
ON CREATE SET b.from_a = a.name, b.self = b.name, b.lit = 'literal'
1076+
RETURN b.from_a, b.self, b.lit
1077+
$$) AS (fa agtype, sf agtype, lit agtype);
1078+
1079+
-- ON MATCH SET with variable RHS (second run on existing node)
1080+
SELECT * FROM cypher('merge_actions', $$
1081+
MATCH (a:Person {name: 'Anchor'})
1082+
MERGE (b:Person {name: 'FromOuter'})
1083+
ON CREATE SET b.source_name = a.name
1084+
ON MATCH SET b.last_seen_by = a.name
1085+
RETURN b.source_name, b.last_seen_by
1086+
$$) AS (src agtype, last agtype);
1087+
10411088
-- cleanup
10421089
SELECT * FROM cypher('merge_actions', $$ MATCH (n) DETACH DELETE n $$) AS (a agtype);
10431090

src/backend/executor/cypher_set.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,7 @@ void apply_update_list(CustomScanState *node,
440440
agtype_value *id;
441441
agtype_value *label;
442442
agtype *original_entity;
443-
agtype *new_property_value;
443+
agtype *new_property_value = NULL;
444444
TupleTableSlot *slot;
445445
ResultRelInfo *resultRelInfo;
446446
ScanKeyData scan_keys[1];

src/backend/parser/cypher_clause.c

Lines changed: 57 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7509,10 +7509,63 @@ transform_merge_make_lateral_join(cypher_parsestate *cpstate, Query *query,
75097509
*/
75107510
get_res_cols(pstate, l_nsitem, r_nsitem, &res_colnames, &res_colvars);
75117511

7512-
/* make the RTE for the join */
7513-
jnsitem = addRangeTableEntryForJoin(pstate, res_colnames, NULL, j->jointype,
7514-
0, res_colvars, NIL, NIL, j->alias,
7515-
NULL, true);
7512+
/*
7513+
* Build a ParseNamespaceColumn array for the join RTE so that
7514+
* subsequent name lookups (e.g. transform_cypher_set_item_list for an
7515+
* ON CREATE SET / ON MATCH SET expression) can resolve references to
7516+
* variables bound in the prev clause or the MERGE's path via
7517+
* colNameToVar → scanNSItemForColumn, which dereferences
7518+
* nsitem->p_nscolumns. Passing NULL here left p_nscolumns unset and
7519+
* caused a segfault whenever an ON SET item's RHS referenced a bound
7520+
* variable (issue #2347).
7521+
*
7522+
* Each column's nscolumn references the join RTE (via its rtindex) with
7523+
* p_varattno = position in res_colnames. This matches the scantuple
7524+
* layout that apply_update_list sees at execution time: the join's
7525+
* target list (built by make_target_list_from_join below) iterates
7526+
* eref->colnames in order, so scantuple[i-1] corresponds to the i-th
7527+
* entry in eref->colnames. Using the underlying RTE's varno/varattno
7528+
* would be semantically equivalent for planner-rewritten Vars in the
7529+
* query tree, but the Vars we produce here end up inside prop_expr --
7530+
* opaque metadata the planner does not walk -- so they stay un-remapped
7531+
* and must index the scantuple layout directly.
7532+
*
7533+
* addRangeTableEntryForJoin appends the new RTE to pstate->p_rtable, so
7534+
* its rtindex is list_length(p_rtable) + 1 at this point.
7535+
*/
7536+
{
7537+
int colcount = list_length(res_colvars);
7538+
int join_rtindex = list_length(pstate->p_rtable) + 1;
7539+
ParseNamespaceColumn *nscolumns;
7540+
ListCell *lvar;
7541+
int col_idx = 0;
7542+
7543+
nscolumns = (ParseNamespaceColumn *)
7544+
palloc0(colcount * sizeof(ParseNamespaceColumn));
7545+
7546+
foreach (lvar, res_colvars)
7547+
{
7548+
Var *v = (Var *) lfirst(lvar);
7549+
7550+
/* res_colvars is populated by get_res_cols via expandRTE */
7551+
Assert(IsA(v, Var));
7552+
7553+
nscolumns[col_idx].p_varno = join_rtindex;
7554+
nscolumns[col_idx].p_varattno = col_idx + 1;
7555+
nscolumns[col_idx].p_vartype = v->vartype;
7556+
nscolumns[col_idx].p_vartypmod = v->vartypmod;
7557+
nscolumns[col_idx].p_varcollid = v->varcollid;
7558+
nscolumns[col_idx].p_varnosyn = join_rtindex;
7559+
nscolumns[col_idx].p_varattnosyn = col_idx + 1;
7560+
col_idx++;
7561+
}
7562+
7563+
/* make the RTE for the join */
7564+
jnsitem = addRangeTableEntryForJoin(pstate, res_colnames, nscolumns,
7565+
j->jointype, 0, res_colvars,
7566+
NIL, NIL, j->alias, NULL, true);
7567+
Assert(jnsitem->p_rtindex == join_rtindex);
7568+
}
75167569

75177570
j->rtindex = jnsitem->p_rtindex;
75187571

0 commit comments

Comments
 (0)