Skip to content

Commit 67a7334

Browse files
committed
feat(cypher): string functions + fail loudly on unsupported syntax
Add single-argument string functions to projections — size, length, trim, ltrim, rtrim, reverse — via the generic function recognizer. More importantly, change the engine to FAIL LOUDLY on unsupported syntax instead of silently projecting an empty column. An unknown function call (e.g. split(...), coalesce(...)) or list indexing/slicing in RETURN/WITH now returns a clear "unsupported function '<name>' (supported: ...)" error rather than a valid-looking but blank result — the same silent-empty failure mode that hid the labels() bug. This supersedes the #373 graceful-resync behaviour (its test is flipped to assert the error; a new test covers an unknown function in RETURN). Decision: full Cypher Tier 3 (lists/maps/paths/comprehensions/params — a value data-model rewrite) is intentionally NOT implemented; clear errors on unsupported features are the higher-value, lower-risk completeness win for the read subset agents actually use. Refs: Cypher read suite (tiers 1-2a + unsupported-syntax hardening)
1 parent 71a6c57 commit 67a7334

3 files changed

Lines changed: 119 additions & 41 deletions

File tree

scripts/smoke-test.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,13 @@ case "$TIV" in
248248
*) echo "OK: query_graph toInteger(f.start_line) = $TIV" ;;
249249
esac
250250

251+
# size() string-length function in projection
252+
SZV=$(cyp_first_cell 'MATCH (f:Function) RETURN size(f.name) AS s LIMIT 1')
253+
case "$SZV" in
254+
''|*[!0-9]*) echo "FAIL: query_graph size(f.name) returned non-integer '$SZV'"; exit 1 ;;
255+
*) echo "OK: query_graph size(f.name) = $SZV" ;;
256+
esac
257+
251258
# =~ regex match in WHERE
252259
CYPHER_RX=$(cli query_graph "{\"project\":\"$PROJECT\",\"query\":\"MATCH (f:Function) WHERE f.name =~ \\\".+\\\" RETURN f.name\"}")
253260
RX_ROWS=$(echo "$CYPHER_RX" | python3 -c "import json,sys; d=json.loads(sys.stdin.read()); print(len(d.get('rows',[])))" 2>/dev/null || echo "0")

src/cypher/cypher.c

Lines changed: 68 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1264,8 +1264,9 @@ static bool cyp_ci_eq(const char *a, const char *b) {
12641264
* casts toInteger/toFloat/toBoolean — or NULL if unrecognised (case-insensitive).
12651265
* toLower/toUpper/toString are separate keyword tokens handled elsewhere. */
12661266
static const char *scalar_func_canonical(const char *s) {
1267-
static const char *const names[] = {"labels", "type", "id", "keys", "properties",
1268-
"toInteger", "toFloat", "toBoolean", NULL};
1267+
static const char *const names[] = {
1268+
"labels", "type", "id", "keys", "properties", "toInteger", "toFloat", "toBoolean",
1269+
"size", "length", "trim", "ltrim", "rtrim", "reverse", NULL};
12691270
for (int i = 0; names[i]; i++) {
12701271
if (cyp_ci_eq(s, names[i])) {
12711272
return names[i];
@@ -1274,6 +1275,16 @@ static const char *scalar_func_canonical(const char *s) {
12741275
return NULL;
12751276
}
12761277

1278+
/* True for single-argument functions that transform a scalar string value
1279+
* (vs. entity-introspection funcs that act on the bound node/edge). */
1280+
static bool is_scalar_value_func(const char *f) {
1281+
return f && (strcmp(f, "toLower") == 0 || strcmp(f, "toUpper") == 0 ||
1282+
strcmp(f, "toString") == 0 || strcmp(f, "toInteger") == 0 ||
1283+
strcmp(f, "toFloat") == 0 || strcmp(f, "toBoolean") == 0 ||
1284+
strcmp(f, "size") == 0 || strcmp(f, "length") == 0 || strcmp(f, "trim") == 0 ||
1285+
strcmp(f, "ltrim") == 0 || strcmp(f, "rtrim") == 0 || strcmp(f, "reverse") == 0);
1286+
}
1287+
12771288
static int parse_var_dot_prop(parser_t *p, cbm_return_item_t *item) {
12781289
const cbm_token_t *var = expect(p, TOK_IDENT);
12791290
if (!var) {
@@ -1365,28 +1376,26 @@ static int parse_return_item(parser_t *p, cbm_return_item_t *item) {
13651376
if (rc < 0) {
13661377
return CBM_NOT_FOUND;
13671378
}
1368-
/* An unknown function call / indexed expression after a bare identifier
1369-
* (e.g. split(f.path,'/')[0]) is not evaluated, but its balanced
1370-
* parentheses/brackets MUST be consumed so the parser stays in sync. Left
1371-
* unconsumed, the trailing tokens desynced the parser into a misleading
1372-
* default star projection (wrong columns, blank rows) (#373). The column
1373-
* keeps its alias and simply projects empty. */
1374-
if (!item->func && !item->kase) {
1375-
while (check(p, TOK_LPAREN) || check(p, TOK_LBRACKET)) {
1376-
cbm_token_type_t open = peek(p)->type;
1377-
cbm_token_type_t close = (open == TOK_LPAREN) ? TOK_RPAREN : TOK_RBRACKET;
1378-
advance(p);
1379-
int depth = 1;
1380-
while (depth > 0 && !check(p, TOK_EOF)) {
1381-
cbm_token_type_t t = peek(p)->type;
1382-
if (t == open) {
1383-
depth++;
1384-
} else if (t == close) {
1385-
depth--;
1386-
}
1387-
advance(p);
1388-
}
1379+
/* A bare identifier followed by '(' is a function we don't recognise
1380+
* (recognised aggregates / string funcs / scalar funcs are handled above),
1381+
* and '[' begins list indexing/slicing we don't support. Rather than
1382+
* silently projecting an empty column — which looks like a valid but blank
1383+
* result and hides the real problem — fail loudly with a clear message so
1384+
* the caller knows the query used an unsupported feature (#373). */
1385+
if (!item->func && !item->kase && (check(p, TOK_LPAREN) || check(p, TOK_LBRACKET))) {
1386+
if (check(p, TOK_LPAREN)) {
1387+
snprintf(p->error, sizeof(p->error),
1388+
"unsupported function '%s' (supported: count, sum, avg, min, max, collect, "
1389+
"toLower, toUpper, toString, toInteger, toFloat, toBoolean, size, length, "
1390+
"trim, ltrim, rtrim, reverse, labels, type, id, keys, properties)",
1391+
item->variable ? item->variable : "?");
1392+
} else {
1393+
snprintf(p->error, sizeof(p->error),
1394+
"unsupported expression: list indexing/slicing '[...]' is not supported");
13891395
}
1396+
safe_str_free(&item->variable);
1397+
safe_str_free(&item->property);
1398+
return CBM_NOT_FOUND;
13901399
}
13911400
/* Optional AS alias */
13921401
if (match(p, TOK_AS)) {
@@ -2427,6 +2436,41 @@ static const char *apply_string_func(const char *func, const char *val, char *bu
24272436
}
24282437
return ""; /* not a boolean → null */
24292438
}
2439+
if (strcmp(func, "size") == 0 || strcmp(func, "length") == 0) {
2440+
snprintf(buf, buf_sz, "%zu", strlen(val));
2441+
return buf;
2442+
}
2443+
if (strcmp(func, "trim") == 0 || strcmp(func, "ltrim") == 0 || strcmp(func, "rtrim") == 0) {
2444+
bool do_left = (strcmp(func, "trim") == 0 || strcmp(func, "ltrim") == 0);
2445+
bool do_right = (strcmp(func, "trim") == 0 || strcmp(func, "rtrim") == 0);
2446+
const char *start = val;
2447+
const char *end = val + strlen(val);
2448+
while (do_left && (*start == ' ' || *start == '\t' || *start == '\n' || *start == '\r')) {
2449+
start++;
2450+
}
2451+
while (do_right && end > start &&
2452+
(end[-1] == ' ' || end[-1] == '\t' || end[-1] == '\n' || end[-1] == '\r')) {
2453+
end--;
2454+
}
2455+
size_t n = (size_t)(end - start);
2456+
if (n >= buf_sz) {
2457+
n = buf_sz - SKIP_ONE;
2458+
}
2459+
memcpy(buf, start, n);
2460+
buf[n] = '\0';
2461+
return buf;
2462+
}
2463+
if (strcmp(func, "reverse") == 0) {
2464+
size_t len = strlen(val);
2465+
if (len >= buf_sz) {
2466+
len = buf_sz - SKIP_ONE;
2467+
}
2468+
for (size_t i = 0; i < len; i++) {
2469+
buf[i] = val[len - SKIP_ONE - i];
2470+
}
2471+
buf[len] = '\0';
2472+
return buf;
2473+
}
24302474
return val;
24312475
}
24322476

@@ -2952,10 +2996,7 @@ static const char *project_item(binding_t *b, cbm_return_item_t *item, char *fun
29522996
}
29532997
}
29542998
const char *raw = binding_get_virtual(b, item->variable, item->property);
2955-
if (item->func &&
2956-
(strcmp(item->func, "toLower") == 0 || strcmp(item->func, "toUpper") == 0 ||
2957-
strcmp(item->func, "toString") == 0 || strcmp(item->func, "toInteger") == 0 ||
2958-
strcmp(item->func, "toFloat") == 0 || strcmp(item->func, "toBoolean") == 0)) {
2999+
if (is_scalar_value_func(item->func)) {
29593000
return apply_string_func(item->func, raw, func_buf, buf_sz);
29603001
}
29613002
return raw;

tests/test_cypher.c

Lines changed: 44 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -646,6 +646,23 @@ TEST(cypher_func_tointeger_tofloat) {
646646
PASS();
647647
}
648648

649+
TEST(cypher_func_size_reverse) {
650+
cbm_store_t *s = setup_cypher_store();
651+
cbm_cypher_result_t r = {0};
652+
int rc = cbm_cypher_execute(s,
653+
"MATCH (f:Function) WHERE f.name = \"LogError\" "
654+
"RETURN size(f.name), length(f.name), reverse(f.name)",
655+
"test", 0, &r);
656+
ASSERT_EQ(rc, 0);
657+
ASSERT_EQ(r.row_count, 1);
658+
ASSERT_STR_EQ(r.rows[0][0], "8"); /* "LogError" has 8 chars */
659+
ASSERT_STR_EQ(r.rows[0][1], "8");
660+
ASSERT_STR_EQ(r.rows[0][2], "rorrEgoL");
661+
cbm_cypher_result_free(&r);
662+
cbm_store_close(s);
663+
PASS();
664+
}
665+
649666
TEST(cypher_exec_calls_relationship) {
650667
cbm_store_t *s = setup_cypher_store();
651668
cbm_cypher_result_t r = {0};
@@ -918,25 +935,36 @@ TEST(cypher_exec_count_distinct_issue239) {
918935
PASS();
919936
}
920937

921-
/* issue #373: an unsupported computed expression in WITH/RETURN (function call
922-
* like split(...) or list indexing [..]) must not desync the parser into a
923-
* misleading default star projection (wrong columns f.name/.../.label, blank
924-
* rows). The requested column shape is preserved; the unsupported expression
925-
* projects empty while sibling aggregates still compute. */
926-
TEST(cypher_exec_unsupported_with_expr_resync_issue373) {
938+
/* issue #373: an unsupported computed expression in WITH/RETURN (an unknown
939+
* function like split(...) or list indexing [..]) must FAIL LOUDLY with a clear
940+
* "unsupported function" error rather than silently projecting an empty column
941+
* (which looks like a valid-but-blank result and hides the real problem). */
942+
TEST(cypher_exec_unsupported_func_errors_issue373) {
927943
cbm_store_t *s = setup_cypher_store();
928944

929945
cbm_cypher_result_t r = {0};
930946
int rc = cbm_cypher_execute(
931947
s, "MATCH (f:Function) WITH split(f.name)[0] AS top, count(*) AS c RETURN top, c", "test",
932948
0, &r);
933-
ASSERT_EQ(rc, 0);
934-
ASSERT_NULL(r.error);
935-
/* Columns are the requested aliases (top, c) — NOT a 3-column star
936-
* projection of f.name/f.qualified_name/f.label. */
937-
ASSERT_EQ(r.col_count, 2);
938-
ASSERT_STR_EQ(r.columns[0], "top");
939-
ASSERT_STR_EQ(r.columns[1], "c");
949+
ASSERT_TRUE(rc != 0); /* unsupported function now fails loudly */
950+
ASSERT_NOT_NULL(r.error);
951+
ASSERT_TRUE(strstr(r.error, "unsupported") != NULL);
952+
ASSERT_TRUE(strstr(r.error, "split") != NULL);
953+
cbm_cypher_result_free(&r);
954+
955+
cbm_store_close(s);
956+
PASS();
957+
}
958+
959+
/* A recognised function still works, and an unknown one in plain RETURN errors. */
960+
TEST(cypher_exec_unknown_func_return_errors) {
961+
cbm_store_t *s = setup_cypher_store();
962+
963+
cbm_cypher_result_t r = {0};
964+
int rc = cbm_cypher_execute(s, "MATCH (f:Function) RETURN nosuchfunc(f.name)", "test", 0, &r);
965+
ASSERT_TRUE(rc != 0);
966+
ASSERT_NOT_NULL(r.error);
967+
ASSERT_TRUE(strstr(r.error, "unsupported function") != NULL);
940968
cbm_cypher_result_free(&r);
941969

942970
cbm_store_close(s);
@@ -2430,6 +2458,7 @@ SUITE(cypher) {
24302458
RUN_TEST(cypher_func_keys);
24312459
RUN_TEST(cypher_func_properties);
24322460
RUN_TEST(cypher_func_tointeger_tofloat);
2461+
RUN_TEST(cypher_func_size_reverse);
24332462
RUN_TEST(cypher_exec_calls_relationship);
24342463
RUN_TEST(cypher_exec_calls_with_where);
24352464
RUN_TEST(cypher_exec_inbound);
@@ -2446,7 +2475,8 @@ SUITE(cypher) {
24462475
RUN_TEST(cypher_exec_where_label_test_issue241);
24472476
RUN_TEST(cypher_exec_label_alternation_issue242);
24482477
RUN_TEST(cypher_exec_count_distinct_issue239);
2449-
RUN_TEST(cypher_exec_unsupported_with_expr_resync_issue373);
2478+
RUN_TEST(cypher_exec_unsupported_func_errors_issue373);
2479+
RUN_TEST(cypher_exec_unknown_func_return_errors);
24502480
RUN_TEST(cypher_exec_inline_props);
24512481
RUN_TEST(cypher_parse_where_starts_with);
24522482
RUN_TEST(cypher_parse_where_contains);

0 commit comments

Comments
 (0)