diff --git a/.env.example b/.env.example index 70d2c439..7759231f 100644 --- a/.env.example +++ b/.env.example @@ -10,7 +10,7 @@ REDIS_ADDR=redis://redis:6379 # --- cubejs --- CUBESQL_DEBUG_QTRACE=true -CUBESTORE_VERSION=v1.3.23-arm64v8 +CUBESTORE_VERSION=v1.6.37 CUBEJS_URL=http://cubejs:4000 CUBEJS_SECRET=cubejsKey diff --git a/CLAUDE.md b/CLAUDE.md index 55cff4fa..d3793b7e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -96,15 +96,32 @@ Docker Compose files per environment: `docker-compose.dev.yml`, `docker-compose. ## Key File Locations -- `services/actions/src/rpc/` — 22 RPC handlers (one file per action) +- `services/actions/src/rpc/` — 25 RPC handlers (one file per action), includes `auditDataschemaDelete`, `auditVersionRollback`, `auditLogsRetention` - `services/cubejs/src/utils/driverFactory.js` — Database driver creation (25+ drivers) - `services/cubejs/src/utils/checkAuth.js` — JWT verification and security context setup - `services/cubejs/src/utils/defineUserScope.js` — Branch/version/access resolution - `services/cubejs/src/utils/buildSecurityContext.js` — Content-hashed context for cache isolation -- `services/cubejs/src/routes/` — 7 REST API endpoints (run-sql, test, get-schema, generate-models, etc.) +- `services/cubejs/src/utils/compilerCacheInvalidator.js` — Branch-scoped compiler-cache eviction (011-model-mgmt-api) +- `services/cubejs/src/utils/referenceScanner.js` — Seven-kind cross-cube reference detector (FR-008) +- `services/cubejs/src/utils/directVerifyAuth.js` — Shared direct-verify helper for branch-scoped routes +- `services/cubejs/src/utils/metaForBranch.js` — Helper that returns raw visibility-filtered metaConfig +- `services/cubejs/src/utils/auditWriter.js` — Best-effort audit-log writer with retry +- `services/cubejs/src/utils/errorCodes.js` — Canonical Model Management API error codes (FR-017) +- `services/cubejs/src/utils/requireOwnerOrAdmin.js` — Owner/admin team-role check +- `services/cubejs/src/utils/mapHasuraErrorCode.js` — Hasura extensions.code → stable error code +- `services/cubejs/src/utils/versionDiff.js` — Per-cube diff adapter over smart-generation/diffModels +- `services/cubejs/src/routes/` — 13 REST API endpoints, now including: + - `validateInBranch.js` (POST /api/v1/validate-in-branch, US1) + - `refreshCompiler.js` (POST /api/v1/internal/refresh-compiler, US2) + - `deleteDataschema.js` (DELETE /api/v1/dataschema/:id, US3) + - `metaSingleCube.js` (GET /api/v1/meta/cube/:cubeName, US4) + - `versionDiff.js` + `versionRollback.js` (POST /api/v1/version/{diff,rollback}, US5) - `services/hasura/metadata/actions.yaml` — GraphQL action definitions (maps to Actions RPC) - `services/hasura/metadata/tables.yaml` — Table definitions, relationships, and permissions -- `services/hasura/migrations/` — 96+ SQL migration directories +- `services/hasura/metadata/cron_triggers.yaml` — Cron triggers (now includes `audit_logs_retention_90d`) +- `services/hasura/migrations/` — 97+ SQL migration directories +- `scripts/lint-error-codes.mjs` — Fails CI when FR-017 error-code enum drifts across contracts +- `tests/workflows/model-management/` — StepCI workflows + SC-003 fixtures for all six endpoints ## Release Process @@ -273,6 +290,8 @@ These are the target patterns for adapting Synmetrix and client-v2: - N/A — no database schema changes. Query results are transient. (009-query-output) - JavaScript (ES modules), Node.js 22+ + Cube.js v1.6.x (CubeJS service), Express 4.18.2, `openai` npm v6.x (NEW), React 18 + Vite + Ant Design 5 (client-v2), URQL (GraphQL client) (010-dynamic-models-ii) - PostgreSQL via Hasura (versions, dataschemas), ClickHouse (profiling target — read-only) (010-dynamic-models-ii) +- JavaScript (ES modules), Node.js 22.x (already current in cubejs service after 003-update-deps) + `@cubejs-backend/schema-compiler` ^1.6.19 (existing; `prepareCompiler` powers validation), `@cubejs-backend/server-core` ^1.6.19 (existing; exposes `cubejs.compilerCache` LRU-cache), `@cubejs-backend/api-gateway` ^1.6.19 (existing; `getCompilerApi` + `filterVisibleItemsInMeta`), `jose` (existing; FraiOS/WorkOS JWT verification), Express 4.x (existing router). No new dependencies. (011-model-mgmt-api) +- PostgreSQL via Hasura (existing `dataschemas`, `versions`, `branches` tables — one new Hasura delete-permission migration on `dataschemas`). In-memory LRU compiler cache inside the cubejs process (existing). No new tables. (011-model-mgmt-api) ## Recent Changes - 001-dev-environment: Added TypeScript (ES2022, Node16 modules) — matches + oclif (CLI framework), zx (shell execution) diff --git a/scripts/lint-error-codes.mjs b/scripts/lint-error-codes.mjs new file mode 100644 index 00000000..5c175ecd --- /dev/null +++ b/scripts/lint-error-codes.mjs @@ -0,0 +1,161 @@ +#!/usr/bin/env node + +/** + * lint-error-codes — FR-017 guard. + * + * Fails with a non-zero exit code (and a human-readable diff) whenever the + * `ErrorCode` enum drifts across: + * - services/cubejs/src/utils/errorCodes.js (single source of truth) + * - specs/011-model-mgmt-api/contracts/*.yaml (every OpenAPI contract) + * + * Every contract under `contracts/` must contain a top-level `ErrorCode` + * schema with an explicit `enum` array. Each enum must match the exhaustive + * list of values in `errorCodes.js`, so clients generating bindings from any + * single contract receive the complete enum. + */ + +import { readFile, readdir } from "node:fs/promises"; +import { fileURLToPath } from "node:url"; +import { dirname, join } from "node:path"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const repoRoot = join(__dirname, ".."); + +const ERROR_CODES_JS = join( + repoRoot, + "services/cubejs/src/utils/errorCodes.js" +); +const CONTRACTS_DIR = join( + repoRoot, + "specs/011-model-mgmt-api/contracts" +); + +async function readErrorCodesJs() { + const text = await readFile(ERROR_CODES_JS, "utf8"); + const re = /["']([a-z][a-z0-9_]+)["']/g; + const known = new Set(); + let m; + while ((m = re.exec(text))) { + const v = m[1]; + if ( + /_(?:invalid|not_found|unresolved|not_visible|unauthorized|by_references|historical_version|authorization|cross_branch|not_on_branch|columns_missing)_?/.test( + v + ) || + /_reference$/.test(v) || + v === "cube_not_found" || + v === "validate_invalid_mode" || + v === "validate_target_not_found" || + v === "validate_unresolved_reference" || + v === "refresh_branch_not_visible" || + v === "refresh_unauthorized" || + v === "delete_blocked_by_references" || + v === "delete_blocked_historical_version" || + v === "delete_blocked_authorization" || + v === "diff_cross_branch" || + v === "diff_invalid_request" || + v === "rollback_version_not_on_branch" || + v === "rollback_blocked_authorization" || + v === "rollback_invalid_request" || + v === "rollback_source_columns_missing" + ) { + known.add(v); + } + } + return known; +} + +async function readContractEnum(path) { + const text = await readFile(path, "utf8"); + // Locate the ErrorCode schema's enum list. Permissive regex: we want every + // value inside the first `enum:` block that follows `ErrorCode:`. + const anchor = text.indexOf("ErrorCode:"); + if (anchor === -1) { + throw new Error(`${path} has no ErrorCode schema`); + } + const afterAnchor = text.slice(anchor); + // Find the enum block: `enum:` → dashed list → end when we hit a line that + // starts with non-dash non-whitespace content at the same or lower indent. + const enumAnchor = afterAnchor.search(/\n\s*enum:\s*\n/); + if (enumAnchor === -1) { + throw new Error(`${path} ErrorCode.enum block not found`); + } + const afterEnum = afterAnchor.slice(enumAnchor).split("\n"); + // Skip the `enum:` line itself. + const values = []; + let inEnum = false; + for (const raw of afterEnum) { + if (!inEnum) { + if (/^\s*enum:\s*$/.test(raw)) inEnum = true; + continue; + } + const trimmed = raw.trim(); + if (trimmed === "") continue; + if (trimmed.startsWith("- ")) { + const v = trimmed.slice(2).replace(/['"]/g, "").trim(); + if (v) values.push(v); + continue; + } + // First non-dash non-empty line after the list ends the enum block. + break; + } + return new Set(values); +} + +function setDiff(a, b) { + const out = []; + for (const v of a) if (!b.has(v)) out.push(v); + return out; +} + +async function main() { + const sourceOfTruth = await readErrorCodesJs(); + if (sourceOfTruth.size === 0) { + console.error("lint-error-codes: errorCodes.js produced an empty set"); + process.exit(1); + } + + const entries = (await readdir(CONTRACTS_DIR)).filter((f) => + f.endsWith(".yaml") + ); + if (entries.length === 0) { + console.error(`lint-error-codes: no contracts found in ${CONTRACTS_DIR}`); + process.exit(1); + } + + let failed = false; + for (const entry of entries) { + const path = join(CONTRACTS_DIR, entry); + let contractSet; + try { + contractSet = await readContractEnum(path); + } catch (err) { + console.error(`lint-error-codes: ${entry} → ${err.message}`); + failed = true; + continue; + } + const missing = setDiff(sourceOfTruth, contractSet); + const extra = setDiff(contractSet, sourceOfTruth); + if (missing.length || extra.length) { + failed = true; + console.error(`lint-error-codes: ${entry} drift detected`); + if (missing.length) { + console.error(` MISSING (in errorCodes.js, not in ${entry}):`); + for (const v of missing) console.error(` - ${v}`); + } + if (extra.length) { + console.error(` EXTRA (in ${entry}, not in errorCodes.js):`); + for (const v of extra) console.error(` - ${v}`); + } + } + } + + if (failed) process.exit(1); + console.log( + `lint-error-codes: OK (${sourceOfTruth.size} codes, ${entries.length} contracts)` + ); +} + +main().catch((err) => { + console.error(err); + process.exit(1); +}); diff --git a/services/actions/src/rpc/__tests__/auditDataschemaDelete.test.js b/services/actions/src/rpc/__tests__/auditDataschemaDelete.test.js new file mode 100644 index 00000000..e3d3729a --- /dev/null +++ b/services/actions/src/rpc/__tests__/auditDataschemaDelete.test.js @@ -0,0 +1,53 @@ +import { describe, it, beforeEach, afterEach } from "node:test"; +import assert from "node:assert/strict"; + +let handler; + +describe("auditDataschemaDelete RPC", () => { + let originalError; + + beforeEach(async () => { + originalError = console.error; + console.error = () => {}; + ({ default: handler } = await import("../auditDataschemaDelete.js")); + }); + + afterEach(() => { + console.error = originalError; + }); + + it("rejects payloads missing event.data.old.id", async () => { + const res = await handler({}, { event: { data: { old: null } } }); + assert.equal(res.ok, false); + assert.match(res.error, /no event.data.old payload/); + }); + + it("falls through to fetchGraphQL and returns a structured error when HASURA is unreachable", async () => { + const prev = process.env.HASURA_ENDPOINT; + process.env.HASURA_ENDPOINT = + "http://127.0.0.1:1/unreachable-audit-test"; + try { + const res = await handler( + { "x-hasura-user-id": "user-1" }, + { + event: { + data: { + old: { + id: "00000000-0000-4000-8000-000000000001", + datasource_id: "00000000-0000-4000-8000-000000000010", + version_id: "00000000-0000-4000-8000-000000000020", + user_id: "user-1", + }, + }, + session_variables: { "x-hasura-user-id": "user-1" }, + }, + } + ); + assert.equal(res.ok, false); + assert.ok(typeof res.error === "string" && res.error.length > 0); + } finally { + if (prev == null) delete process.env.HASURA_ENDPOINT; + else process.env.HASURA_ENDPOINT = prev; + } + }); +}); diff --git a/services/actions/src/rpc/__tests__/auditLogsRetention.test.js b/services/actions/src/rpc/__tests__/auditLogsRetention.test.js new file mode 100644 index 00000000..3990974a --- /dev/null +++ b/services/actions/src/rpc/__tests__/auditLogsRetention.test.js @@ -0,0 +1,35 @@ +import { describe, it, beforeEach, afterEach } from "node:test"; +import assert from "node:assert/strict"; + +let handler; + +describe("auditLogsRetention RPC", () => { + let originalError; + + beforeEach(async () => { + originalError = console.error; + console.error = () => {}; + ({ default: handler } = await import("../auditLogsRetention.js")); + }); + + afterEach(() => { + console.error = originalError; + }); + + it("returns a structured error when Hasura is unreachable", async () => { + const prev = process.env.HASURA_ENDPOINT; + process.env.HASURA_ENDPOINT = + "http://127.0.0.1:1/unreachable-retention-test"; + try { + const res = await handler(); + assert.ok(res); + assert.ok( + typeof res.error === "string" && res.error.length > 0, + "expected structured error on unreachable Hasura" + ); + } finally { + if (prev == null) delete process.env.HASURA_ENDPOINT; + else process.env.HASURA_ENDPOINT = prev; + } + }); +}); diff --git a/services/actions/src/rpc/__tests__/auditVersionRollback.test.js b/services/actions/src/rpc/__tests__/auditVersionRollback.test.js new file mode 100644 index 00000000..1738f9b5 --- /dev/null +++ b/services/actions/src/rpc/__tests__/auditVersionRollback.test.js @@ -0,0 +1,64 @@ +import { describe, it, beforeEach, afterEach } from "node:test"; +import assert from "node:assert/strict"; + +let handler; + +describe("auditVersionRollback RPC", () => { + let originalError; + + beforeEach(async () => { + originalError = console.error; + console.error = () => {}; + ({ default: handler } = await import("../auditVersionRollback.js")); + }); + + afterEach(() => { + console.error = originalError; + }); + + it("skips non-rollback inserts", async () => { + const res = await handler( + {}, + { + event: { + data: { + new: { id: "v-1", origin: "user" }, + }, + }, + } + ); + assert.deepEqual(res, { ok: true, skipped: true }); + }); + + it("processes rollback inserts and reports Hasura errors gracefully", async () => { + const prev = process.env.HASURA_ENDPOINT; + process.env.HASURA_ENDPOINT = + "http://127.0.0.1:1/unreachable-rollback-audit"; + try { + const res = await handler( + {}, + { + event: { + data: { + new: { + id: "00000000-0000-4000-8000-000000000099", + origin: "rollback", + branch_id: "00000000-0000-4000-8000-000000000050", + user_id: "00000000-0000-4000-8000-000000000005", + checksum: "abc", + }, + }, + session_variables: { + "x-hasura-user-id": "00000000-0000-4000-8000-000000000005", + }, + }, + } + ); + assert.equal(res.ok, false); + assert.ok(typeof res.error === "string"); + } finally { + if (prev == null) delete process.env.HASURA_ENDPOINT; + else process.env.HASURA_ENDPOINT = prev; + } + }); +}); diff --git a/services/actions/src/rpc/auditDataschemaDelete.js b/services/actions/src/rpc/auditDataschemaDelete.js new file mode 100644 index 00000000..9cd7a23c --- /dev/null +++ b/services/actions/src/rpc/auditDataschemaDelete.js @@ -0,0 +1,87 @@ +import { fetchGraphQL } from "../utils/graphql.js"; + +const INSERT_AUDIT = ` + mutation InsertAuditLog( + $action: String! + $user_id: uuid! + $datasource_id: uuid + $branch_id: uuid + $target_id: uuid! + $outcome: String! + $error_code: String + $payload: jsonb + ) { + insert_audit_logs_one(object: { + action: $action, + user_id: $user_id, + datasource_id: $datasource_id, + branch_id: $branch_id, + target_id: $target_id, + outcome: $outcome, + error_code: $error_code, + payload: $payload + }) { + id + } + } +`; + +const BRANCH_QUERY = ` + query BranchForVersion($id: uuid!) { + versions_by_pk(id: $id) { + id + branch_id + } + } +`; + +/** + * audit_dataschema_delete — Hasura event trigger handler. + * + * Fires AFTER a `dataschemas.delete` commits. Writes the `outcome='success'` + * audit row for FR-016. The handler-level failure paths in the CubeJS route + * `deleteDataschema.js` write the matching `outcome='failure'` rows + * directly — this handler covers only the success path (trigger only fires + * on committed deletes). + */ +export default async (session, input) => { + const row = input?.event?.data?.old; + if (!row?.id) { + return { ok: false, error: "no event.data.old payload" }; + } + + const sessionVars = input?.event?.session_variables || session || {}; + const userId = + sessionVars["x-hasura-user-id"] || + sessionVars["X-Hasura-User-Id"] || + row.user_id || + null; + + // Resolve branch_id from the deleted row's version_id. + let branchId = null; + if (row.version_id) { + try { + const br = await fetchGraphQL(BRANCH_QUERY, { id: row.version_id }); + branchId = br?.data?.versions_by_pk?.branch_id || null; + } catch { + // non-fatal — audit row tolerates null branch_id + } + } + + try { + const res = await fetchGraphQL(INSERT_AUDIT, { + action: "dataschema_delete", + user_id: userId, + datasource_id: row.datasource_id || null, + branch_id: branchId, + target_id: row.id, + outcome: "success", + error_code: null, + payload: row, + }); + const id = res?.data?.insert_audit_logs_one?.id; + return { ok: true, auditLogId: id }; + } catch (err) { + return { ok: false, error: err?.message || String(err) }; + } +}; diff --git a/services/actions/src/rpc/auditLogsRetention.js b/services/actions/src/rpc/auditLogsRetention.js new file mode 100644 index 00000000..95a921d6 --- /dev/null +++ b/services/actions/src/rpc/auditLogsRetention.js @@ -0,0 +1,28 @@ +import { fetchGraphQL } from "../utils/graphql.js"; + +const DELETE_STALE = ` + mutation DeleteStaleAuditLogs($cutoff: timestamptz!) { + delete_audit_logs(where: {created_at: {_lt: $cutoff}}) { + affected_rows + } + } +`; + +/** + * audit_logs_retention — Hasura cron trigger handler. + * + * Deletes `audit_logs` rows older than 90 days (FR-016 retention clause). + * Scheduled daily by `audit_logs_retention_90d` in + * services/hasura/metadata/cron_triggers.yaml. + */ +export default async () => { + const cutoff = new Date(Date.now() - 90 * 24 * 60 * 60 * 1000).toISOString(); + try { + const res = await fetchGraphQL(DELETE_STALE, { cutoff }); + const affected = + res?.data?.delete_audit_logs?.affected_rows ?? 0; + return { deleted: affected, cutoff }; + } catch (err) { + return { error: err?.message || String(err) }; + } +}; diff --git a/services/actions/src/rpc/auditVersionRollback.js b/services/actions/src/rpc/auditVersionRollback.js new file mode 100644 index 00000000..52e12c84 --- /dev/null +++ b/services/actions/src/rpc/auditVersionRollback.js @@ -0,0 +1,86 @@ +import { fetchGraphQL } from "../utils/graphql.js"; + +const INSERT_AUDIT = ` + mutation InsertAuditLog( + $action: String! + $user_id: uuid! + $datasource_id: uuid + $branch_id: uuid + $target_id: uuid! + $outcome: String! + $error_code: String + $payload: jsonb + ) { + insert_audit_logs_one(object: { + action: $action, + user_id: $user_id, + datasource_id: $datasource_id, + branch_id: $branch_id, + target_id: $target_id, + outcome: $outcome, + error_code: $error_code, + payload: $payload + }) { + id + } + } +`; + +const BRANCH_DATASOURCE_QUERY = ` + query BranchWithDatasource($id: uuid!) { + branches_by_pk(id: $id) { + id + datasource_id + } + } +`; + +/** + * audit_version_rollback — Hasura event trigger handler. + * + * Fires AFTER a `versions.insert` commits. Because the trigger in + * tables.yaml matches `insert with columns: '*'`, we filter here for + * `origin = 'rollback'` to ignore normal authoring/smart_gen inserts. + */ +export default async (session, input) => { + const row = input?.event?.data?.new; + if (!row?.id || row.origin !== "rollback") { + return { ok: true, skipped: true }; + } + + const sessionVars = input?.event?.session_variables || session || {}; + const userId = + sessionVars["x-hasura-user-id"] || + sessionVars["X-Hasura-User-Id"] || + row.user_id || + null; + + let datasourceId = null; + if (row.branch_id) { + try { + const br = await fetchGraphQL(BRANCH_DATASOURCE_QUERY, { + id: row.branch_id, + }); + datasourceId = br?.data?.branches_by_pk?.datasource_id || null; + } catch { + // non-fatal + } + } + + try { + const res = await fetchGraphQL(INSERT_AUDIT, { + action: "version_rollback", + user_id: userId, + datasource_id: datasourceId, + branch_id: row.branch_id || null, + target_id: row.id, + outcome: "success", + error_code: null, + payload: { origin: row.origin, checksum: row.checksum }, + }); + const id = res?.data?.insert_audit_logs_one?.id; + return { ok: true, auditLogId: id }; + } catch (err) { + return { ok: false, error: err?.message || String(err) }; + } +}; diff --git a/services/cubejs/package.json b/services/cubejs/package.json index 8df4b689..6599cfdf 100644 --- a/services/cubejs/package.json +++ b/services/cubejs/package.json @@ -4,39 +4,41 @@ "scripts": { "start": "node index.js", "start.dev": "nodemon --exitcrash --inspect=0.0.0.0 --max-old-space-size=8096 --max-http-header-size=32768 --watch src --watch index.js", - "jsdoc": "jsdoc index.js -r src -d docs" + "jsdoc": "jsdoc index.js -r src -d docs", + "test": "node --test 'src/**/__tests__/*.test.js'", + "lint:error-codes": "node ../../scripts/lint-error-codes.mjs" }, "dependencies": { - "@cubejs-backend/api-gateway": "^1.6.19", - "@cubejs-backend/athena-driver": "^1.6.19", - "@cubejs-backend/bigquery-driver": "^1.6.19", - "@cubejs-backend/clickhouse-driver": "^1.6.19", - "@cubejs-backend/crate-driver": "^1.6.19", - "@cubejs-backend/cubestore-driver": "^1.6.19", - "@cubejs-backend/databricks-jdbc-driver": "^1.6.19", - "@cubejs-backend/dremio-driver": "^1.6.19", - "@cubejs-backend/druid-driver": "^1.6.19", - "@cubejs-backend/duckdb-driver": "^1.6.19", - "@cubejs-backend/elasticsearch-driver": "^1.6.19", - "@cubejs-backend/firebolt-driver": "^1.6.19", - "@cubejs-backend/hive-driver": "^1.6.19", - "@cubejs-backend/jdbc-driver": "^1.6.19", - "@cubejs-backend/ksql-driver": "^1.6.19", - "@cubejs-backend/materialize-driver": "^1.6.19", - "@cubejs-backend/mongobi-driver": "^1.6.19", - "@cubejs-backend/mssql-driver": "^1.6.19", - "@cubejs-backend/mysql-driver": "^1.6.19", - "@cubejs-backend/oracle-driver": "^1.6.19", - "@cubejs-backend/pinot-driver": "^1.6.19", - "@cubejs-backend/postgres-driver": "^1.6.19", - "@cubejs-backend/prestodb-driver": "^1.6.19", - "@cubejs-backend/query-orchestrator": "^1.6.19", - "@cubejs-backend/questdb-driver": "^1.6.19", - "@cubejs-backend/redshift-driver": "^1.6.19", - "@cubejs-backend/server-core": "^1.6.19", - "@cubejs-backend/snowflake-driver": "^1.6.19", - "@cubejs-backend/sqlite-driver": "^1.6.19", - "@cubejs-backend/trino-driver": "^1.6.19", + "@cubejs-backend/api-gateway": "^1.6.37", + "@cubejs-backend/athena-driver": "^1.6.37", + "@cubejs-backend/bigquery-driver": "^1.6.37", + "@cubejs-backend/clickhouse-driver": "^1.6.37", + "@cubejs-backend/crate-driver": "^1.6.37", + "@cubejs-backend/cubestore-driver": "^1.6.37", + "@cubejs-backend/databricks-jdbc-driver": "^1.6.37", + "@cubejs-backend/dremio-driver": "^1.6.37", + "@cubejs-backend/druid-driver": "^1.6.37", + "@cubejs-backend/duckdb-driver": "^1.6.37", + "@cubejs-backend/elasticsearch-driver": "^1.6.37", + "@cubejs-backend/firebolt-driver": "^1.6.37", + "@cubejs-backend/hive-driver": "^1.6.37", + "@cubejs-backend/jdbc-driver": "^1.6.37", + "@cubejs-backend/ksql-driver": "^1.6.37", + "@cubejs-backend/materialize-driver": "^1.6.37", + "@cubejs-backend/mongobi-driver": "^1.6.37", + "@cubejs-backend/mssql-driver": "^1.6.37", + "@cubejs-backend/mysql-driver": "^1.6.37", + "@cubejs-backend/oracle-driver": "^1.6.37", + "@cubejs-backend/pinot-driver": "^1.6.37", + "@cubejs-backend/postgres-driver": "^1.6.37", + "@cubejs-backend/prestodb-driver": "^1.6.37", + "@cubejs-backend/query-orchestrator": "^1.6.37", + "@cubejs-backend/questdb-driver": "^1.6.37", + "@cubejs-backend/redshift-driver": "^1.6.37", + "@cubejs-backend/server-core": "^1.6.37", + "@cubejs-backend/snowflake-driver": "^1.6.37", + "@cubejs-backend/sqlite-driver": "^1.6.37", + "@cubejs-backend/trino-driver": "^1.6.37", "@cubejs-backend/vertica-driver": "npm:@knowitall/vertica-driver@^0.32.3", "apache-arrow": "^21.1.0", "body-parser": "^1.19.0", diff --git a/services/cubejs/src/routes/__tests__/metaAll.test.js b/services/cubejs/src/routes/__tests__/metaAll.test.js new file mode 100644 index 00000000..03bc0c67 --- /dev/null +++ b/services/cubejs/src/routes/__tests__/metaAll.test.js @@ -0,0 +1,86 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; + +import { summarizeCube } from "../metaAll.js"; + +const ds = { + id: "ds-1", + name: "prod", + db_type: "clickhouse", + team_id: "team-1", +}; + +describe("summarizeCube — T013h dataschema_id / file_name enrichment", () => { + it("populates dataschema_id + file_name when the cube name maps to a dataschema row", () => { + const dataschemaByCubeName = new Map([ + ["orders", { id: "ds-row-1", name: "orders.yml" }], + ["customers", { id: "ds-row-2", name: "customers.yml" }], + ]); + const compiled = { + config: { + name: "orders", + title: "Orders", + public: true, + measures: [{ name: "count" }], + dimensions: [{ name: "id" }], + segments: [], + meta: null, + }, + }; + const out = summarizeCube( + compiled, + ds, + "branch-1", + "ver-1", + dataschemaByCubeName + ); + assert.equal(out.dataschema_id, "ds-row-1"); + assert.equal(out.file_name, "orders.yml"); + assert.equal(out.name, "orders"); + }); + + it("returns null when the cube name has no backing dataschema (synthetic cube)", () => { + const compiled = { + config: { + name: "synthetic_cube", + public: true, + measures: [], + dimensions: [], + segments: [], + }, + }; + const out = summarizeCube( + compiled, + ds, + "branch-1", + "ver-1", + new Map() + ); + assert.equal(out.dataschema_id, null); + assert.equal(out.file_name, null); + }); + + it("returns null when the cube name does not match any declared cube", () => { + const dataschemaByCubeName = new Map([ + ["orders", { id: "ds-row-1", name: "orders.yml" }], + ]); + const compiled = { + config: { + name: "ghosts", + public: true, + measures: [], + dimensions: [], + segments: [], + }, + }; + const out = summarizeCube( + compiled, + ds, + "branch-1", + "ver-1", + dataschemaByCubeName + ); + assert.equal(out.dataschema_id, null); + assert.equal(out.file_name, null); + }); +}); diff --git a/services/cubejs/src/routes/__tests__/validateInBranch.corpus.test.js b/services/cubejs/src/routes/__tests__/validateInBranch.corpus.test.js new file mode 100644 index 00000000..bc32cd78 --- /dev/null +++ b/services/cubejs/src/routes/__tests__/validateInBranch.corpus.test.js @@ -0,0 +1,85 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; + +import { readFile, readdir } from "node:fs/promises"; +import { fileURLToPath } from "node:url"; +import { dirname, join } from "node:path"; +import YAML from "yaml"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const FIXTURE_DIR = join( + __dirname, + "../../../../../tests/workflows/model-management/fixtures" +); + +const EXPECTED = new Set([ + "valid-append.yml", + "dangling-join.yml", + "circular-extends.yml", + "measure-to-measure-typo.yml", + "preagg-reference-break.yml", + "filter-params-orphan.yml", +]); + +const VALID_MODES = new Set(["append", "replace", "preview-delete"]); +const VALID_ERROR_CODES = new Set([ + null, + "validate_unresolved_reference", + "delete_blocked_by_references", +]); +const VALID_REFERENCE_KINDS = new Set([ + null, + "joins", + "extends", + "sub_query", + "formula", + "segment", + "pre_aggregation", + "filter_params", +]); + +describe("SC-003 fixture corpus", () => { + it("discovers all six expected fixtures", async () => { + const entries = await readdir(FIXTURE_DIR); + const yaml = entries.filter((n) => n.endsWith(".yml")); + for (const name of EXPECTED) { + assert.ok( + yaml.includes(name), + `missing fixture: ${name}` + ); + } + }); + + for (const fixtureName of EXPECTED) { + it(`${fixtureName} — well-formed shape`, async () => { + const raw = await readFile(join(FIXTURE_DIR, fixtureName), "utf8"); + const doc = YAML.parse(raw); + assert.equal(typeof doc.name, "string"); + assert.ok(VALID_MODES.has(doc.mode), `bad mode: ${doc.mode}`); + assert.ok(Array.isArray(doc.branchSeed)); + assert.ok(doc.branchSeed.length >= 1); + for (const seed of doc.branchSeed) { + assert.equal(typeof seed.file, "string"); + assert.equal(typeof seed.code, "string"); + } + if (doc.mode === "append" || doc.mode === "replace") { + assert.ok(doc.draft, "draft required for append/replace"); + assert.equal(typeof doc.draft.fileName, "string"); + assert.equal(typeof doc.draft.content, "string"); + } + if (doc.mode === "replace" || doc.mode === "preview-delete") { + assert.equal(typeof doc.targetCube, "string"); + } + assert.ok(doc.expectedOutcome); + assert.equal(typeof doc.expectedOutcome.valid, "boolean"); + assert.ok( + VALID_ERROR_CODES.has(doc.expectedOutcome.errorCode), + `bad errorCode: ${doc.expectedOutcome.errorCode}` + ); + assert.ok( + VALID_REFERENCE_KINDS.has(doc.expectedOutcome.referenceKind), + `bad referenceKind: ${doc.expectedOutcome.referenceKind}` + ); + }); + } +}); diff --git a/services/cubejs/src/routes/deleteDataschema.js b/services/cubejs/src/routes/deleteDataschema.js new file mode 100644 index 00000000..b56fa09c --- /dev/null +++ b/services/cubejs/src/routes/deleteDataschema.js @@ -0,0 +1,377 @@ +import YAML from "yaml"; + +import { verifyAndProvision } from "../utils/directVerifyAuth.js"; +import { findUser } from "../utils/dataSourceHelpers.js"; +import { fetchGraphQL } from "../utils/graphql.js"; +import { mintHasuraToken } from "../utils/mintHasuraToken.js"; +import { mintedTokenCache } from "../utils/mintedTokenCache.js"; +import { requireOwnerOrAdmin } from "../utils/requireOwnerOrAdmin.js"; +import { resolvePartitionTeamIds } from "./discover.js"; +import { scanCrossCubeReferences } from "../utils/referenceScanner.js"; +import { writeAuditLog } from "../utils/auditWriter.js"; +import { mapHasuraErrorCode } from "../utils/mapHasuraErrorCode.js"; +import { ErrorCode } from "../utils/errorCodes.js"; +import { parseCubesFromJs } from "../utils/smart-generation/diffModels.js"; + +const RESOLVE_TARGET_QUERY = ` + query ResolveTargetDataschema($id: uuid!) { + dataschemas_by_pk(id: $id) { + id + name + code + version_id + version { + id + is_current + branch { + id + status + datasource { + id + team_id + } + } + } + } + } +`; + +const SIBLINGS_QUERY = ` + query Siblings($versionId: uuid!, $excludeId: uuid!) { + dataschemas( + where: { + version_id: {_eq: $versionId} + id: {_neq: $excludeId} + } + ) { + id + name + code + } + } +`; + +const DELETE_MUTATION = ` + mutation DeleteDataschema($id: uuid!) { + delete_dataschemas_by_pk(id: $id) { + id + } + } +`; + +function parseCubes(name, code) { + if (!code) return []; + const isYaml = name?.endsWith(".yml") || name?.endsWith(".yaml"); + try { + if (isYaml) { + const parsed = YAML.parse(code); + return Array.isArray(parsed?.cubes) ? parsed.cubes : []; + } + const cubes = parseCubesFromJs(code); + return Array.isArray(cubes) ? cubes : []; + } catch { + return []; + } +} + +async function ensureHasuraTokenForUser(userId) { + let tok = mintedTokenCache.get(userId); + if (tok) return tok; + tok = await mintHasuraToken(userId); + const decoded = JSON.parse( + Buffer.from(tok.split(".")[1], "base64url").toString() + ); + mintedTokenCache.set(userId, tok, decoded.exp); + return tok; +} + +function respondError(res, status, code, message, extra = {}) { + return res.status(status).json({ code, message, ...extra }); +} + +/** + * DELETE /api/v1/dataschema/:dataschemaId + * + * Remove a dataschema row from the currently-active version of its branch. + * Enforces, in order: + * - authentication (FR-015 direct-verify) + * - partition gate (FR-015) + * - owner/admin role on the datasource's team (FR-015) + * - version-level immutability via `is_current=true` + `branch.status=active` (FR-007) + * - cross-cube reference scan (FR-008, seven kinds) + * + * Every rejection path emits a durable audit row with `outcome='failure'` via + * `writeAuditLog` (FR-016). Successful deletes are captured by the + * `delete_dataschema_audit` Hasura event trigger. + */ +export default async function deleteDataschema(req, res) { + const verified = await verifyAndProvision(req); + if (verified.error) { + return respondError( + res, + verified.error.status, + verified.error.code, + verified.error.message + ); + } + const { payload, userId } = verified; + + const dataschemaId = req.params?.dataschemaId; + if (!dataschemaId || typeof dataschemaId !== "string") { + return respondError( + res, + 400, + "delete_invalid_request", + "dataschemaId path parameter is required" + ); + } + + // Resolve the target via admin-secret GraphQL (handler owns enforcement). + let targetRow; + try { + const r = await fetchGraphQL(RESOLVE_TARGET_QUERY, { id: dataschemaId }); + targetRow = r?.data?.dataschemas_by_pk; + } catch (err) { + return respondError( + res, + 503, + "hasura_unavailable", + err?.message || "Hasura unavailable" + ); + } + + if (!targetRow) { + return respondError( + res, + 404, + ErrorCode.VALIDATE_TARGET_NOT_FOUND, + "Dataschema not found" + ); + } + + const version = targetRow.version; + const branch = version?.branch; + const datasource = branch?.datasource; + const teamId = datasource?.team_id; + const datasourceId = datasource?.id; + const branchId = branch?.id; + + const user = await findUser({ userId }); + + // Partition gate. + const partitionTeamIds = resolvePartitionTeamIds( + user.members, + payload.partition + ); + if (partitionTeamIds && !partitionTeamIds.has(teamId)) { + await writeAuditLog({ + action: "dataschema_delete", + userId, + datasourceId, + branchId, + targetId: dataschemaId, + outcome: "failure", + errorCode: ErrorCode.DELETE_BLOCKED_AUTHORIZATION, + payload: { reason: "partition_mismatch" }, + }); + return respondError( + res, + 403, + ErrorCode.DELETE_BLOCKED_AUTHORIZATION, + "Caller's partition does not match the datasource's team" + ); + } + + // Owner/admin gate. + if (!requireOwnerOrAdmin(user, teamId)) { + await writeAuditLog({ + action: "dataschema_delete", + userId, + datasourceId, + branchId, + targetId: dataschemaId, + outcome: "failure", + errorCode: ErrorCode.DELETE_BLOCKED_AUTHORIZATION, + payload: { reason: "insufficient_role" }, + }); + return respondError( + res, + 403, + ErrorCode.DELETE_BLOCKED_AUTHORIZATION, + "Owner or admin role required" + ); + } + + // Version-level immutability (FR-007): only the current version of the + // active branch may be edited. + if (version?.is_current !== true || branch?.status !== "active") { + await writeAuditLog({ + action: "dataschema_delete", + userId, + datasourceId, + branchId, + targetId: dataschemaId, + outcome: "failure", + errorCode: ErrorCode.DELETE_BLOCKED_HISTORICAL_VERSION, + payload: { + is_current: version?.is_current ?? null, + branch_status: branch?.status ?? null, + }, + }); + return respondError( + res, + 409, + ErrorCode.DELETE_BLOCKED_HISTORICAL_VERSION, + "Dataschema is attached to a historical version — only the current version of the active branch is mutable" + ); + } + + // Cross-cube reference scan (FR-008). + let siblings; + try { + const r = await fetchGraphQL(SIBLINGS_QUERY, { + versionId: version.id, + excludeId: dataschemaId, + }); + siblings = r?.data?.dataschemas || []; + } catch (err) { + return respondError( + res, + 503, + "hasura_unavailable", + err?.message || "Hasura unavailable" + ); + } + + const targetCubeNames = parseCubes(targetRow.name, targetRow.code).map( + (c) => c.name + ); + const otherCubes = siblings.flatMap((row) => + parseCubes(row.name, row.code).map((c) => ({ + cubeName: c.name, + fileName: row.name, + code: row.code, + })) + ); + + const blockingReferences = []; + for (const name of targetCubeNames) { + for (const ref of scanCrossCubeReferences(name, otherCubes)) { + blockingReferences.push(ref); + } + } + + if (blockingReferences.length > 0) { + await writeAuditLog({ + action: "dataschema_delete", + userId, + datasourceId, + branchId, + targetId: dataschemaId, + outcome: "failure", + errorCode: ErrorCode.DELETE_BLOCKED_BY_REFERENCES, + payload: { blockingReferences }, + }); + return respondError( + res, + 409, + ErrorCode.DELETE_BLOCKED_BY_REFERENCES, + "Cube is referenced by another cube on the same branch", + { blockingReferences } + ); + } + + // Fire the actual delete with the caller's minted Hasura token so the + // user-role delete_permissions filter applies at the DB layer too (two-layer + // defence per research R4). + let hasuraToken; + try { + hasuraToken = await ensureHasuraTokenForUser(userId); + } catch { + return respondError( + res, + 503, + "auth_unavailable", + "Unable to mint Hasura token" + ); + } + + let del; + try { + del = await fetchGraphQL( + DELETE_MUTATION, + { id: dataschemaId }, + hasuraToken, + { preserveErrors: true } + ); + } catch (err) { + return respondError( + res, + 503, + "hasura_unavailable", + err?.message || "Hasura unavailable" + ); + } + + if (del?.errors) { + const mapped = mapHasuraErrorCode(del.errors, { action: "delete" }); + if (mapped === ErrorCode.DELETE_BLOCKED_AUTHORIZATION) { + await writeAuditLog({ + action: "dataschema_delete", + userId, + datasourceId, + branchId, + targetId: dataschemaId, + outcome: "failure", + errorCode: mapped, + payload: { hasura_code: del.errors?.[0]?.extensions?.code || null }, + }); + return respondError( + res, + 403, + mapped, + "Hasura rejected the delete (permission-error)" + ); + } + await writeAuditLog({ + action: "dataschema_delete", + userId, + datasourceId, + branchId, + targetId: dataschemaId, + outcome: "failure", + errorCode: "hasura_rejected", + payload: { errors: del.errors }, + }); + return respondError( + res, + 503, + "hasura_unavailable", + "Hasura rejected the delete" + ); + } + + if (!del?.data?.delete_dataschemas_by_pk?.id) { + // Row vanished concurrently — treat as not-found and audit for visibility. + await writeAuditLog({ + action: "dataschema_delete", + userId, + datasourceId, + branchId, + targetId: dataschemaId, + outcome: "failure", + errorCode: ErrorCode.VALIDATE_TARGET_NOT_FOUND, + payload: { reason: "row_not_found_at_delete" }, + }); + return respondError( + res, + 404, + ErrorCode.VALIDATE_TARGET_NOT_FOUND, + "Dataschema not found at delete time" + ); + } + + // Success path: the Hasura delete event trigger `delete_dataschema_audit` + // writes the outcome='success' audit row. Handler does not duplicate. + return res.json({ deleted: true, dataschemaId }); +} diff --git a/services/cubejs/src/routes/discover.js b/services/cubejs/src/routes/discover.js index 330bd301..fc95864a 100644 --- a/services/cubejs/src/routes/discover.js +++ b/services/cubejs/src/routes/discover.js @@ -184,6 +184,18 @@ export default async function discover(req, res) { "Discover nested/array columns in a datasource.", "POST /api/v1/validate": "Validate a Cube.js model file.", + "POST /api/v1/validate-in-branch": + "Validate a draft dataschema file in the context of a branch's deployed cubes (Model Management API). Modes: append | replace | preview-delete. Direct-verify auth — does NOT require x-hasura-datasource-id.", + "POST /api/v1/internal/refresh-compiler": + "Invalidate the compiler cache for a branch's dataschemas so the next metadata/query call recompiles. Owner/admin only. Direct-verify auth — does NOT require x-hasura-datasource-id.", + "DELETE /api/v1/dataschema/:dataschemaId": + "Delete a dataschema from the current version of the active branch. Blocks on cross-cube references (FR-008, seven kinds). Owner/admin only. Direct-verify auth — does NOT require x-hasura-datasource-id.", + "GET /api/v1/meta/cube/:cubeName": + "Return the compiled metadata envelope for a single cube on the selected branch. Requires x-hasura-datasource-id; optional x-hasura-branch-id.", + "POST /api/v1/version/diff": + "Diff two versions on the same branch (added/removed/modified cubes). Direct-verify auth — does NOT require x-hasura-datasource-id.", + "POST /api/v1/version/rollback": + "Roll a branch back to a prior version by inserting a new version whose dataschemas are clones of the target. Owner/admin only. Direct-verify auth — does NOT require x-hasura-datasource-id.", "GET /api/v1/pre-aggregations": "List pre-aggregations for the datasource.", "POST /api/v1/pre-aggregation-preview": diff --git a/services/cubejs/src/routes/index.js b/services/cubejs/src/routes/index.js index a23dbbe7..60f87c29 100644 --- a/services/cubejs/src/routes/index.js +++ b/services/cubejs/src/routes/index.js @@ -1,5 +1,14 @@ import express from "express"; +// Model Management API (feature 011-model-mgmt-api) adds six routes registered +// below — each handler owns its own auth where indicated: +// POST /api/v1/validate-in-branch (direct-verify, US1) +// POST /api/v1/internal/refresh-compiler (direct-verify, US2) +// DELETE /api/v1/dataschema/:dataschemaId (direct-verify, US3) +// GET /api/v1/meta/cube/:cubeName (checkAuthMiddleware, US4) +// POST /api/v1/version/diff (direct-verify, US5) +// POST /api/v1/version/rollback (direct-verify, US5) + import checkAuthMiddleware from "../utils/checkAuth.js"; import { invalidateUserCache, @@ -30,7 +39,13 @@ import discoverNested from "./discoverNested.js"; import discover from "./discover.js"; import metaAll from "./metaAll.js"; import testConnection from "./testConnection.js"; +import deleteDataschema from "./deleteDataschema.js"; +import metaSingleCube from "./metaSingleCube.js"; +import refreshCompiler from "./refreshCompiler.js"; import validate from "./validate.js"; +import validateInBranch from "./validateInBranch.js"; +import versionDiff from "./versionDiff.js"; +import versionRollback from "./versionRollback.js"; import version from "./version.js"; const router = express.Router(); @@ -275,6 +290,41 @@ export default ({ basePath, cubejs }) => { metaAll(req, res, cubejs) ); + // Model Management API: contextual validation (US1). + // Direct-verify auth — NOT behind checkAuthMiddleware (no x-hasura-datasource-id header). + router.post(`${basePath}/v1/validate-in-branch`, async (req, res) => + validateInBranch(req, res) + ); + + // Model Management API: compiler-cache refresh (US2). Owner/admin only. + router.post(`${basePath}/v1/internal/refresh-compiler`, async (req, res) => + refreshCompiler(req, res, cubejs) + ); + + // Model Management API: delete a single dataschema (US3). Owner/admin only. + router.delete( + `${basePath}/v1/dataschema/:dataschemaId`, + async (req, res) => deleteDataschema(req, res) + ); + + // Model Management API: single-cube metadata (US4). + // Datasource-scoped: runs behind checkAuthMiddleware (x-hasura-datasource-id + // is mandatory by contract). The /cube/ path segment prevents collision + // with Cube.js's built-in aggregate /api/v1/meta endpoint. + router.get( + `${basePath}/v1/meta/cube/:cubeName`, + checkAuthMiddleware, + async (req, res) => metaSingleCube(req, res, cubejs) + ); + + // Model Management API: diff + rollback (US5). + router.post(`${basePath}/v1/version/diff`, async (req, res) => + versionDiff(req, res) + ); + router.post(`${basePath}/v1/version/rollback`, async (req, res) => + versionRollback(req, res, cubejs) + ); + // Version endpoint is public — returns only the schema-compiler version string router.get(`${basePath}/v1/version`, (req, res) => version(req, res)); diff --git a/services/cubejs/src/routes/metaAll.js b/services/cubejs/src/routes/metaAll.js index cf7df66b..792cee89 100644 --- a/services/cubejs/src/routes/metaAll.js +++ b/services/cubejs/src/routes/metaAll.js @@ -10,8 +10,8 @@ import { provisionUserFromWorkOS, provisionUserFromFraiOS, } from "../utils/dataSourceHelpers.js"; -import defineUserScope from "../utils/defineUserScope.js"; -import { resolvePartitionTeamIds } from "./discover.js"; +import { compileMetaForBranch } from "../utils/metaForBranch.js"; +import { extractCubes, resolvePartitionTeamIds } from "./discover.js"; function getRequestId(req) { return ( @@ -21,8 +21,26 @@ function getRequestId(req) { ); } -function summarizeCube(cube, ds, branchId, versionId) { +/** + * Summarize one compiled cube for the aggregate catalog response. + * + * `dataschemaByCubeName` maps each defined cube name to its source + * dataschema row `{id, name}` (the `name` column stores the file name). + * Cube.js v1.6's `metaConfig` does NOT carry `fileName`, so the lookup is by + * cube name rather than file name. + * + * Cubes without a matching dataschema (synthetic / injected cubes) receive + * `null` for both `dataschema_id` and `file_name`. + */ +function summarizeCube(cube, ds, branchId, versionId, dataschemaByCubeName) { const node = cube?.config || cube || {}; + const cubeName = node.name; + const match = + cubeName && dataschemaByCubeName + ? dataschemaByCubeName.get(cubeName) || null + : null; + const dataschemaId = match?.id || null; + const fileName = match?.name || null; return { datasource_id: ds.id, datasource_name: ds.name, @@ -38,10 +56,19 @@ function summarizeCube(cube, ds, branchId, versionId) { dimensions: (node.dimensions || []).map((d) => d.name), segments: (node.segments || []).map((s) => s.name), meta: node.meta || null, + dataschema_id: dataschemaId, + file_name: fileName, }; } -async function metaForDatasource(apiGateway, req, userId, authToken, ds, allMembers) { +async function metaForDatasource( + apiGateway, + req, + userId, + authToken, + ds, + allMembers +) { const activeBranch = ds.branches?.find((b) => b.status === "active") || ds.branches?.[0]; if (!activeBranch) { @@ -49,29 +76,43 @@ async function metaForDatasource(apiGateway, req, userId, authToken, ds, allMemb } const latestVersion = activeBranch.versions?.[0] || null; - const userScope = defineUserScope( - [ds], - allMembers, - ds.id, - activeBranch.id, - latestVersion?.id - ); + // Map cube-name → dataschema row. Cube.js metaConfig output omits file + // provenance, so resolve via the source YAML/JS of each dataschema + // (spec Assumption L171). + const dataschemaByCubeName = new Map(); + for (const schema of latestVersion?.dataschemas || []) { + if (!schema?.id) continue; + const declared = extractCubes(schema); + for (const decl of declared) { + if (decl?.name) { + dataschemaByCubeName.set(decl.name, { + id: schema.id, + name: schema.name, + }); + } + } + } - const securityContext = { authToken, userId, userScope }; - const context = await apiGateway.contextByReq( + const { metaConfig } = await compileMetaForBranch({ + apiGateway, req, - securityContext, - getRequestId(req) - ); - - const compilerApi = await apiGateway.getCompilerApi(context); - let metaConfig = await compilerApi.metaConfig(context, { - requestId: context.requestId, + userId, + authToken, + dataSource: ds, + branchId: activeBranch.id, + versionId: latestVersion?.id, + allMembers, + requestId: getRequestId(req), }); - metaConfig = apiGateway.filterVisibleItemsInMeta(context, metaConfig); const cubes = (metaConfig || []).map((entry) => - summarizeCube(entry, ds, activeBranch.id, latestVersion?.id || null) + summarizeCube( + entry, + ds, + activeBranch.id, + latestVersion?.id || null, + dataschemaByCubeName + ) ); return { @@ -91,7 +132,8 @@ async function metaForDatasource(apiGateway, req, userId, authToken, ds, allMemb * Aggregated cube catalog across every datasource the caller can see. * One request walks all partition-filtered datasources, resolves their * active branch + latest version, compiles each, and returns a summary - * per cube (name, title, description, measures, dimensions, segments, meta). + * per cube (name, title, description, measures, dimensions, segments, meta, + * `dataschema_id`, `file_name`). * * Auth: WorkOS RS256 or FraiOS HS256 Bearer token (same as /discover). * @@ -103,7 +145,9 @@ async function metaForDatasource(apiGateway, req, userId, authToken, ds, allMemb * branch_id, version_id, * cubes: [{ datasource_id, ..., name, title, description, public, * measures: string[], dimensions: string[], segments: string[], - * meta: object | null }] + * meta: object | null, + * dataschema_id: uuid | null, + * file_name: string | null }] * } * ], * errors?: [{ datasource_id, error }] @@ -187,3 +231,7 @@ export default async function metaAll(req, res, cubejs) { }); } } + +// Exposed for unit testing and for validation contexts that need to assemble +// their own summaries. +export { summarizeCube, metaForDatasource }; diff --git a/services/cubejs/src/routes/metaSingleCube.js b/services/cubejs/src/routes/metaSingleCube.js new file mode 100644 index 00000000..99527b53 --- /dev/null +++ b/services/cubejs/src/routes/metaSingleCube.js @@ -0,0 +1,111 @@ +import { findUser } from "../utils/dataSourceHelpers.js"; +import { compileMetaForBranch } from "../utils/metaForBranch.js"; +import { ErrorCode } from "../utils/errorCodes.js"; + +/** + * GET /api/v1/meta/cube/:cubeName + * + * Return the compiled metadata envelope for a single cube on the requested + * branch. Runs behind `checkAuthMiddleware`, so `req.securityContext` and + * the `x-hasura-datasource-id` header are already validated. + * + * The resolved branch is the caller's selection (`x-hasura-branch-id`, or + * the active branch) and the endpoint always compiles the latest version of + * that branch (FR-009 + spec Assumption L172). + * + * Respects the access-list visibility filter applied by + * `apiGateway.filterVisibleItemsInMeta` (FR-010). If the caller's access + * list hides the cube, the response is a clean 404 with `code: + * cube_not_found`. + */ +export default async function metaSingleCube(req, res, cubejs) { + const cubeName = req.params?.cubeName; + if (!cubeName || typeof cubeName !== "string") { + return res.status(400).json({ + code: "cube_not_found", + message: "cubeName path parameter is required", + }); + } + + const { authToken, userId, userScope } = req.securityContext || {}; + if (!userScope?.dataSource) { + return res.status(400).json({ + code: "cube_not_found", + message: "userScope not established", + }); + } + + const datasourceId = userScope.dataSource.dataSourceId; + const branchHeader = req.headers["x-hasura-branch-id"] || undefined; + + const user = await findUser({ userId }); + const dataSource = user.dataSources.find((ds) => ds.id === datasourceId); + if (!dataSource) { + return res.status(404).json({ + code: ErrorCode.CUBE_NOT_FOUND, + message: "Datasource not visible", + }); + } + + const apiGateway = cubejs.apiGateway(); + let metaConfig; + let resolvedBranchId; + let resolvedVersionId; + try { + const result = await compileMetaForBranch({ + apiGateway, + req, + userId, + authToken, + dataSource, + branchId: branchHeader, + allMembers: user.members, + }); + metaConfig = result.metaConfig; + resolvedBranchId = result.branchId; + resolvedVersionId = result.versionId; + } catch (err) { + const status = err?.status || 500; + if (status === 404) { + return res.status(404).json({ + code: ErrorCode.CUBE_NOT_FOUND, + message: err?.message || "Branch not found", + }); + } + return res.status(status).json({ + code: ErrorCode.CUBE_NOT_FOUND, + message: err?.message || "Failed to compile branch metadata", + }); + } + + const match = (metaConfig || []).find((entry) => { + const node = entry?.config || entry || {}; + return node?.name === cubeName; + }); + + if (!match) { + return res.status(404).json({ + code: ErrorCode.CUBE_NOT_FOUND, + message: `Cube "${cubeName}" not found on the requested branch`, + branchId: resolvedBranchId, + }); + } + + const node = match?.config || match || {}; + return res.json({ + cube: { + name: node.name, + title: node.title || null, + description: node.description || null, + public: node.public !== false, + measures: node.measures || [], + dimensions: node.dimensions || [], + segments: node.segments || [], + hierarchies: node.hierarchies || undefined, + meta: node.meta || null, + }, + datasourceId, + branchId: resolvedBranchId, + versionId: resolvedVersionId, + }); +} diff --git a/services/cubejs/src/routes/refreshCompiler.js b/services/cubejs/src/routes/refreshCompiler.js new file mode 100644 index 00000000..29a4a5d2 --- /dev/null +++ b/services/cubejs/src/routes/refreshCompiler.js @@ -0,0 +1,133 @@ +import { verifyAndProvision } from "../utils/directVerifyAuth.js"; +import { findUser } from "../utils/dataSourceHelpers.js"; +import { resolvePartitionTeamIds } from "./discover.js"; +import { requireOwnerOrAdmin } from "../utils/requireOwnerOrAdmin.js"; +import defineUserScope from "../utils/defineUserScope.js"; +import { invalidateCompilerForBranch } from "../utils/compilerCacheInvalidator.js"; +import { ErrorCode } from "../utils/errorCodes.js"; + +function respondError(res, status, code, message) { + return res.status(status).json({ code, message }); +} + +/** + * POST /api/v1/internal/refresh-compiler + * + * Invalidate compiler-cache entries scoped to the caller's branch so that + * subsequent metadata/query requests recompile from the current dataschemas + * (FR-004). Asynchronous — the eviction happens in-memory, the response + * returns as soon as keys are removed, recompilation happens on the next + * downstream request (FR-004a). Idempotent per (branch, schemaVersion) + * pair (FR-005). + * + * Owner/admin required (FR-015 + research.md §R14): refresh affects every + * other caller sharing the branch, so it carries the same authorisation bar + * as delete and rollback. + * + * Direct-verify auth — handler owns JWT verification; no + * x-hasura-datasource-id header required. + */ +export default async function refreshCompiler(req, res, cubejs) { + const verified = await verifyAndProvision(req); + if (verified.error) { + return respondError( + res, + verified.error.status, + verified.error.code, + verified.error.message + ); + } + const { payload, userId } = verified; + + const { branchId } = req.body || {}; + if (!branchId || typeof branchId !== "string") { + return respondError( + res, + 400, + "refresh_invalid_request", + "branchId is required" + ); + } + + const user = await findUser({ userId }); + if (!user?.dataSources?.length) { + return respondError( + res, + 404, + ErrorCode.REFRESH_BRANCH_NOT_VISIBLE, + "No datasources visible to this caller" + ); + } + + const dataSource = user.dataSources.find((ds) => + (ds.branches || []).some((b) => b.id === branchId) + ); + if (!dataSource) { + return respondError( + res, + 404, + ErrorCode.REFRESH_BRANCH_NOT_VISIBLE, + "Branch not found or not visible to this caller" + ); + } + + const partitionTeamIds = resolvePartitionTeamIds( + user.members, + payload.partition + ); + if (partitionTeamIds && !partitionTeamIds.has(dataSource.team_id)) { + return respondError( + res, + 403, + ErrorCode.REFRESH_UNAUTHORIZED, + "Caller's partition does not match the branch's team" + ); + } + + if (!requireOwnerOrAdmin(user, dataSource.team_id)) { + return respondError( + res, + 403, + ErrorCode.REFRESH_UNAUTHORIZED, + "Owner or admin role required for refresh" + ); + } + + let schemaVersion; + try { + const scope = defineUserScope( + user.dataSources, + user.members, + dataSource.id, + branchId, + undefined + ); + schemaVersion = scope?.dataSource?.schemaVersion; + } catch (err) { + const status = err?.status || 500; + return respondError( + res, + status, + ErrorCode.REFRESH_BRANCH_NOT_VISIBLE, + err?.message || "Unable to resolve branch scope" + ); + } + + const evicted = invalidateCompilerForBranch(cubejs, schemaVersion); + + // Non-durable observability log (FR-004 refresh is NOT a persistent mutation + // and does not produce an audit_logs row — SC-007 scope is delete + rollback). + console.log( + JSON.stringify({ + level: "info", + action: "refresh_compiler", + userId, + branchId, + evicted, + schemaVersion, + ts: new Date().toISOString(), + }) + ); + + return res.json({ evicted, schemaVersion }); +} diff --git a/services/cubejs/src/routes/validateInBranch.js b/services/cubejs/src/routes/validateInBranch.js new file mode 100644 index 00000000..cf2f9e21 --- /dev/null +++ b/services/cubejs/src/routes/validateInBranch.js @@ -0,0 +1,374 @@ +import { prepareCompiler } from "@cubejs-backend/schema-compiler"; + +import { verifyAndProvision } from "../utils/directVerifyAuth.js"; +import { + findUser, + findDataSchemas, +} from "../utils/dataSourceHelpers.js"; +import { fetchGraphQL } from "../utils/graphql.js"; +import { mintHasuraToken } from "../utils/mintHasuraToken.js"; +import { mintedTokenCache } from "../utils/mintedTokenCache.js"; +import { requireOwnerOrAdmin } from "../utils/requireOwnerOrAdmin.js"; +import { scanCrossCubeReferences } from "../utils/referenceScanner.js"; +import { resolvePartitionTeamIds } from "./discover.js"; +import { ErrorCode } from "../utils/errorCodes.js"; +import { extractCubes } from "./discover.js"; + +const BRANCH_DATASOURCE_QUERY = ` + query BranchDatasource($branchId: uuid!) { + branches_by_pk(id: $branchId) { + id + status + datasource { + id + team_id + } + } + } +`; + +class InMemorySchemaFileRepository { + constructor(files) { + this.files = files; + } + localPath() { + return "/"; + } + async dataSchemaFiles() { + return this.files; + } +} + +function mapCompilerError(err, code) { + const mapped = { + severity: "error", + message: err.plainMessage || err.message || String(err), + fileName: err.fileName || null, + startLine: err.lineNumber != null ? Number(err.lineNumber) : null, + startColumn: err.position != null ? Number(err.position) : null, + endLine: null, + endColumn: null, + }; + if (code) mapped.code = code; + return mapped; +} + +function mapSyntaxWarning(warn) { + const loc = warn.loc || {}; + const start = loc.start || {}; + const end = loc.end || {}; + return { + severity: "warning", + message: warn.plainMessage || warn.message || String(warn), + fileName: null, + startLine: start.line != null ? Number(start.line) : null, + startColumn: start.column != null ? Number(start.column) : null, + endLine: end.line != null ? Number(end.line) : null, + endColumn: end.column != null ? Number(end.column) : null, + }; +} + +function badRequest(res, code, message) { + return res.status(400).json({ code, message }); +} + +function respondJson(res, status, body) { + return res.status(status).json(body); +} + +async function ensureHasuraTokenForUser(userId) { + let hasuraToken = mintedTokenCache.get(userId); + if (hasuraToken) return hasuraToken; + hasuraToken = await mintHasuraToken(userId); + const parts = hasuraToken.split("."); + const payload = JSON.parse( + Buffer.from(parts[1], "base64url").toString() + ); + mintedTokenCache.set(userId, hasuraToken, payload.exp); + return hasuraToken; +} + +/** + * POST /api/v1/validate-in-branch + * + * Validates a draft dataschema file against the branch's currently-deployed + * cubes. Three modes (FR-002): + * - append → add draft alongside deployed files + * - replace → swap target dataschema's code with the draft + * - preview-delete → compile the branch without the target file + * + * Direct-verify auth (FR-015): team membership is sufficient for `append`; + * `replace` and `preview-delete` require owner or admin on the datasource's + * team (both modes signal intent to mutate). + */ +export default async function validateInBranch(req, res) { + const verified = await verifyAndProvision(req); + if (verified.error) { + return respondJson(res, verified.error.status, { + code: verified.error.code, + message: verified.error.message, + }); + } + const { payload, userId } = verified; + + const body = req.body || {}; + const { branchId, mode, draft, targetDataschemaId } = body; + + if (!branchId || typeof branchId !== "string") { + return badRequest( + res, + ErrorCode.VALIDATE_INVALID_MODE, + "branchId is required" + ); + } + if ( + mode !== "append" && + mode !== "replace" && + mode !== "preview-delete" + ) { + return badRequest( + res, + ErrorCode.VALIDATE_INVALID_MODE, + "mode must be append | replace | preview-delete" + ); + } + + // Mode-conditional field validation. + if (mode === "append") { + if (!draft || typeof draft !== "object") { + return badRequest( + res, + ErrorCode.VALIDATE_INVALID_MODE, + "draft is required when mode is append" + ); + } + if (targetDataschemaId) { + return badRequest( + res, + ErrorCode.VALIDATE_INVALID_MODE, + "targetDataschemaId must not be set when mode is append" + ); + } + } else if (mode === "replace") { + if (!draft || typeof draft !== "object") { + return badRequest( + res, + ErrorCode.VALIDATE_INVALID_MODE, + "draft is required when mode is replace" + ); + } + if (!targetDataschemaId) { + return badRequest( + res, + ErrorCode.VALIDATE_INVALID_MODE, + "targetDataschemaId is required when mode is replace" + ); + } + } else { + // preview-delete + if (draft) { + return badRequest( + res, + ErrorCode.VALIDATE_INVALID_MODE, + "draft must not be set when mode is preview-delete" + ); + } + if (!targetDataschemaId) { + return badRequest( + res, + ErrorCode.VALIDATE_INVALID_MODE, + "targetDataschemaId is required when mode is preview-delete" + ); + } + } + + // Resolve branch → datasource → team_id (handler runs before the GraphQL + // user-role query because the minted Hasura token we need for findDataSchemas + // gets authorized against the branch's existing permissions). + let branchRow; + try { + const res1 = await fetchGraphQL(BRANCH_DATASOURCE_QUERY, { branchId }); + branchRow = res1?.data?.branches_by_pk; + } catch (err) { + return respondJson(res, 503, { + code: "hasura_unavailable", + message: err?.message || "Hasura unavailable", + }); + } + + if (!branchRow) { + return respondJson(res, 404, { + code: ErrorCode.VALIDATE_TARGET_NOT_FOUND, + message: "Branch not found", + }); + } + + const teamId = branchRow.datasource?.team_id; + const user = await findUser({ userId }); + + // Partition gate (FR-015). + const partitionTeamIds = resolvePartitionTeamIds( + user.members, + payload.partition + ); + if (partitionTeamIds && !partitionTeamIds.has(teamId)) { + return respondJson(res, 403, { + code: ErrorCode.DELETE_BLOCKED_AUTHORIZATION, + message: "Caller's partition does not match the branch's team", + }); + } + + // Owner/admin gate for non-append modes. + if (mode !== "append" && !requireOwnerOrAdmin(user, teamId)) { + return respondJson(res, 403, { + code: ErrorCode.DELETE_BLOCKED_AUTHORIZATION, + message: "Owner or admin role required for replace / preview-delete", + }); + } + + // Load existing dataschemas via a minted Hasura token (findDataSchemas uses + // the user-role select permission). + let existing; + try { + const hasuraToken = await ensureHasuraTokenForUser(userId); + existing = await findDataSchemas({ branchId, authToken: hasuraToken }); + } catch (err) { + return respondJson(res, 503, { + code: "hasura_unavailable", + message: err?.message || "Hasura unavailable", + }); + } + + // Assemble the compile repository per mode. + const files = []; + let targetExisting = null; + + for (const row of existing) { + if (mode === "replace" && row.id === targetDataschemaId) { + targetExisting = row; + files.push({ fileName: row.name, content: draft.content }); + continue; + } + if (mode === "preview-delete" && row.id === targetDataschemaId) { + targetExisting = row; + continue; // excluded from compile set + } + files.push({ fileName: row.name, content: row.code }); + } + + if (mode === "replace" && !targetExisting) { + return respondJson(res, 404, { + code: ErrorCode.VALIDATE_TARGET_NOT_FOUND, + message: "targetDataschemaId is not attached to the specified branch", + }); + } + if (mode === "preview-delete" && !targetExisting) { + return respondJson(res, 404, { + code: ErrorCode.VALIDATE_TARGET_NOT_FOUND, + message: "targetDataschemaId is not attached to the specified branch", + }); + } + + if (mode === "append") { + if (files.some((f) => f.fileName === draft.fileName)) { + // File name collision — surface as compile error. + return res.json({ + valid: false, + errors: [ + { + severity: "error", + message: `A dataschema named "${draft.fileName}" already exists on this branch`, + fileName: draft.fileName, + startLine: null, + startColumn: null, + endLine: null, + endColumn: null, + code: ErrorCode.VALIDATE_INVALID_MODE, + }, + ], + warnings: [], + }); + } + files.push({ fileName: draft.fileName, content: draft.content }); + } + + // Compile. + try { + const repo = new InMemorySchemaFileRepository(files); + const { compiler } = prepareCompiler(repo, { + allowNodeRequire: false, + standalone: true, + }); + + let compileError = null; + try { + await compiler.compile(); + } catch (err) { + compileError = err; + } + + const errorsReport = compiler.errorsReport; + const rawErrors = errorsReport ? errorsReport.getErrors() : []; + const rawWarnings = errorsReport ? errorsReport.getWarnings() : []; + + let errors; + if (rawErrors.length > 0) { + errors = rawErrors.map((e) => + mapCompilerError(e, ErrorCode.VALIDATE_UNRESOLVED_REFERENCE) + ); + } else if (compileError) { + errors = [ + { + severity: "error", + message: compileError.message || String(compileError), + fileName: null, + startLine: null, + startColumn: null, + endLine: null, + endColumn: null, + code: ErrorCode.VALIDATE_UNRESOLVED_REFERENCE, + }, + ]; + } else { + errors = []; + } + + const warnings = rawWarnings.map(mapSyntaxWarning); + const result = { + valid: errors.length === 0, + errors, + warnings, + }; + + if (mode === "preview-delete" && result.valid === false) { + // Attach a structured blockingReferences list for the caller so the + // delete endpoint can consume it. + const targetCubeNames = extractCubes(targetExisting).map((c) => c.name); + const otherCubes = existing + .filter((row) => row.id !== targetDataschemaId) + .flatMap((row) => + extractCubes(row).map((c) => ({ + cubeName: c.name, + fileName: row.name, + code: row.code, + })) + ); + const hits = []; + for (const targetName of targetCubeNames) { + for (const ref of scanCrossCubeReferences(targetName, otherCubes)) { + hits.push(ref); + } + } + if (hits.length > 0) { + result.blockingReferences = hits; + } + } + + return res.json(result); + } catch (err) { + return respondJson(res, 500, { + code: "validate_error", + message: err?.message || "Validation failed", + }); + } +} diff --git a/services/cubejs/src/routes/versionDiff.js b/services/cubejs/src/routes/versionDiff.js new file mode 100644 index 00000000..5cbb8982 --- /dev/null +++ b/services/cubejs/src/routes/versionDiff.js @@ -0,0 +1,126 @@ +import { verifyAndProvision } from "../utils/directVerifyAuth.js"; +import { + findUser, + findVersionBranch, + findVersionDataschemas, +} from "../utils/dataSourceHelpers.js"; +import { resolvePartitionTeamIds } from "./discover.js"; +import { diffVersions } from "../utils/versionDiff.js"; +import { ErrorCode } from "../utils/errorCodes.js"; + +function respondError(res, status, code, message) { + return res.status(status).json({ code, message }); +} + +/** + * POST /api/v1/version/diff + * + * Body: `{fromVersionId, toVersionId}`. Both versions must belong to the same + * branch (FR-012). Returns `VersionDiffResponse` per data-model.md §2.9. + */ +export default async function versionDiff(req, res) { + const verified = await verifyAndProvision(req); + if (verified.error) { + return respondError( + res, + verified.error.status, + verified.error.code, + verified.error.message + ); + } + const { payload, userId } = verified; + + const { fromVersionId, toVersionId } = req.body || {}; + if (!fromVersionId || !toVersionId) { + return respondError( + res, + 400, + ErrorCode.DIFF_INVALID_REQUEST, + "fromVersionId and toVersionId are required" + ); + } + if (fromVersionId === toVersionId) { + return respondError( + res, + 400, + ErrorCode.DIFF_INVALID_REQUEST, + "fromVersionId and toVersionId must differ" + ); + } + + let fromMeta; + let toMeta; + try { + [fromMeta, toMeta] = await Promise.all([ + findVersionBranch({ versionId: fromVersionId }), + findVersionBranch({ versionId: toVersionId }), + ]); + } catch (err) { + return respondError( + res, + 503, + "hasura_unavailable", + err?.message || "Hasura unavailable" + ); + } + + if (!fromMeta || !toMeta) { + return respondError( + res, + 404, + ErrorCode.DIFF_INVALID_REQUEST, + "One or both versions not found" + ); + } + + if (fromMeta.branchId !== toMeta.branchId) { + return respondError( + res, + 400, + ErrorCode.DIFF_CROSS_BRANCH, + "Diff requires both versions on the same branch" + ); + } + + const user = await findUser({ userId }); + const partitionTeamIds = resolvePartitionTeamIds( + user.members, + payload.partition + ); + if (partitionTeamIds && !partitionTeamIds.has(toMeta.teamId)) { + return respondError( + res, + 403, + ErrorCode.DIFF_INVALID_REQUEST, + "Caller's partition does not match the branch's team" + ); + } + + let fromRows; + let toRows; + try { + [fromRows, toRows] = await Promise.all([ + findVersionDataschemas({ versionId: fromVersionId }), + findVersionDataschemas({ versionId: toVersionId }), + ]); + } catch (err) { + return respondError( + res, + 503, + "hasura_unavailable", + err?.message || "Hasura unavailable" + ); + } + + const diff = diffVersions({ + fromDataschemas: fromRows, + toDataschemas: toRows, + }); + + return res.json({ + branchId: toMeta.branchId, + fromVersionId, + toVersionId, + ...diff, + }); +} diff --git a/services/cubejs/src/routes/versionRollback.js b/services/cubejs/src/routes/versionRollback.js new file mode 100644 index 00000000..aeee62a8 --- /dev/null +++ b/services/cubejs/src/routes/versionRollback.js @@ -0,0 +1,202 @@ +import { verifyAndProvision } from "../utils/directVerifyAuth.js"; +import { + findUser, + findVersionBranch, + rollbackVersion as rollbackHelper, +} from "../utils/dataSourceHelpers.js"; +import { requireOwnerOrAdmin } from "../utils/requireOwnerOrAdmin.js"; +import { resolvePartitionTeamIds } from "./discover.js"; +import { writeAuditLog } from "../utils/auditWriter.js"; +import { mapHasuraErrorCode } from "../utils/mapHasuraErrorCode.js"; +import { mintHasuraToken } from "../utils/mintHasuraToken.js"; +import { mintedTokenCache } from "../utils/mintedTokenCache.js"; +import { ErrorCode } from "../utils/errorCodes.js"; + +async function ensureHasuraTokenForUser(userId) { + let tok = mintedTokenCache.get(userId); + if (tok) return tok; + tok = await mintHasuraToken(userId); + const decoded = JSON.parse( + Buffer.from(tok.split(".")[1], "base64url").toString() + ); + mintedTokenCache.set(userId, tok, decoded.exp); + return tok; +} + +function respondError(res, status, code, message, extra = {}) { + return res.status(status).json({ code, message, ...extra }); +} + +/** + * POST /api/v1/version/rollback + * + * Body: `{branchId, toVersionId}`. Creates a new version on `branchId` whose + * dataschemas are byte-identical clones of `toVersionId`'s dataschemas + * (FR-013). The trigger `versions_flip_is_current_trg` switches the new row + * to `is_current=true` atomically. Owner/admin required (FR-015). + * + * Every failure branch writes an audit row with `outcome='failure'`; success + * is captured by the `version_rollback_audit` event trigger (FR-016). + * + * Note: source-column drift check (spec Edge Case L115) is not implemented + * in this first cut — it requires a driver round-trip per cube, and the + * insert's permission / constraint errors surface via the Hasura path. + * This is tracked as a follow-up; handler intentionally returns the + * Hasura-mapped failure code rather than silently succeeding. + */ +export default async function versionRollback(req, res, cubejs) { + const verified = await verifyAndProvision(req); + if (verified.error) { + return respondError( + res, + verified.error.status, + verified.error.code, + verified.error.message + ); + } + const { payload, userId } = verified; + + const { branchId, toVersionId } = req.body || {}; + if (!branchId || !toVersionId) { + return respondError( + res, + 400, + ErrorCode.ROLLBACK_INVALID_REQUEST, + "branchId and toVersionId are required" + ); + } + + const meta = await findVersionBranch({ versionId: toVersionId }); + if (!meta) { + return respondError( + res, + 404, + ErrorCode.ROLLBACK_VERSION_NOT_ON_BRANCH, + "Target version not found" + ); + } + if (meta.branchId !== branchId) { + await writeAuditLog({ + action: "version_rollback", + userId, + datasourceId: meta.datasourceId, + branchId, + targetId: toVersionId, + outcome: "failure", + errorCode: ErrorCode.ROLLBACK_VERSION_NOT_ON_BRANCH, + payload: { resolvedBranchId: meta.branchId }, + }); + return respondError( + res, + 400, + ErrorCode.ROLLBACK_VERSION_NOT_ON_BRANCH, + "toVersionId does not belong to the requested branch" + ); + } + + const user = await findUser({ userId }); + const partitionTeamIds = resolvePartitionTeamIds( + user.members, + payload.partition + ); + if (partitionTeamIds && !partitionTeamIds.has(meta.teamId)) { + await writeAuditLog({ + action: "version_rollback", + userId, + datasourceId: meta.datasourceId, + branchId, + targetId: toVersionId, + outcome: "failure", + errorCode: ErrorCode.ROLLBACK_BLOCKED_AUTHORIZATION, + payload: { reason: "partition_mismatch" }, + }); + return respondError( + res, + 403, + ErrorCode.ROLLBACK_BLOCKED_AUTHORIZATION, + "Caller's partition does not match the branch's team" + ); + } + if (!requireOwnerOrAdmin(user, meta.teamId)) { + await writeAuditLog({ + action: "version_rollback", + userId, + datasourceId: meta.datasourceId, + branchId, + targetId: toVersionId, + outcome: "failure", + errorCode: ErrorCode.ROLLBACK_BLOCKED_AUTHORIZATION, + payload: { reason: "insufficient_role" }, + }); + return respondError( + res, + 403, + ErrorCode.ROLLBACK_BLOCKED_AUTHORIZATION, + "Owner or admin role required" + ); + } + + let hasuraToken; + try { + hasuraToken = await ensureHasuraTokenForUser(userId); + } catch { + return respondError( + res, + 503, + "auth_unavailable", + "Unable to mint Hasura token" + ); + } + + let result; + try { + result = await rollbackHelper({ + branchId, + toVersionId, + userId, + datasourceId: meta.datasourceId, + authToken: hasuraToken, + }); + } catch (err) { + return respondError( + res, + 503, + "hasura_unavailable", + err?.message || "Hasura unavailable" + ); + } + + if (result?.errors) { + const mapped = mapHasuraErrorCode(result.errors, { action: "rollback" }); + const errorCode = mapped || "hasura_rejected"; + await writeAuditLog({ + action: "version_rollback", + userId, + datasourceId: meta.datasourceId, + branchId, + targetId: toVersionId, + outcome: "failure", + errorCode, + payload: { errors: result.errors }, + }); + if (mapped === ErrorCode.ROLLBACK_BLOCKED_AUTHORIZATION) { + return respondError( + res, + 403, + mapped, + "Hasura rejected the rollback (permission-error)" + ); + } + return respondError( + res, + 503, + "hasura_unavailable", + "Hasura rejected the rollback" + ); + } + + return res.json({ + newVersionId: result.newVersionId, + clonedDataschemaCount: result.clonedDataschemaCount, + }); +} diff --git a/services/cubejs/src/utils/__tests__/auditWriter.test.js b/services/cubejs/src/utils/__tests__/auditWriter.test.js new file mode 100644 index 00000000..4dac3a72 --- /dev/null +++ b/services/cubejs/src/utils/__tests__/auditWriter.test.js @@ -0,0 +1,73 @@ +import { describe, it, beforeEach, afterEach } from "node:test"; +import assert from "node:assert/strict"; + +let writeAuditLog; + +describe("writeAuditLog", () => { + let originalError; + let captured; + + beforeEach(async () => { + captured = []; + originalError = console.error; + console.error = (msg) => captured.push(msg); + ({ writeAuditLog } = await import("../auditWriter.js")); + }); + + afterEach(() => { + console.error = originalError; + }); + + it("rejects incomplete calls and emits a structured stderr line", async () => { + const res = await writeAuditLog({ + action: "dataschema_delete", + userId: null, + targetId: "t1", + outcome: "failure", + }); + assert.deepEqual(res, { ok: false }); + + assert.equal(captured.length, 1); + const parsed = JSON.parse(captured[0]); + assert.equal(parsed.event, "audit_write_failed"); + assert.equal(parsed.reason, "missing_required_fields"); + }); + + it("retries on transient failures and eventually returns {ok:false}", async () => { + // HASURA_ENDPOINT is unset (or unreachable) in this test env — fetchGraphQL + // throws on every attempt, so the retry loop exhausts its 3 attempts and + // the stderr line is emitted. We assert shape, not timing. + const originalEndpoint = process.env.HASURA_ENDPOINT; + process.env.HASURA_ENDPOINT = + "http://127.0.0.1:1/unreachable-audit-writer-test"; + try { + const res = await writeAuditLog({ + action: "version_rollback", + userId: "00000000-0000-4000-8000-000000000001", + targetId: "00000000-0000-4000-8000-000000000002", + outcome: "failure", + errorCode: "rollback_version_not_on_branch", + }); + assert.deepEqual(res, { ok: false }); + + const writeFailedLine = captured + .map((m) => { + try { + return JSON.parse(m); + } catch { + return null; + } + }) + .find((m) => m && m.event === "audit_write_failed"); + assert.ok(writeFailedLine, "expected audit_write_failed stderr line"); + assert.equal(writeFailedLine.action, "version_rollback"); + assert.equal(writeFailedLine.outcome, "failure"); + } finally { + if (originalEndpoint == null) { + delete process.env.HASURA_ENDPOINT; + } else { + process.env.HASURA_ENDPOINT = originalEndpoint; + } + } + }); +}); diff --git a/services/cubejs/src/utils/__tests__/compilerCacheInvalidator.test.js b/services/cubejs/src/utils/__tests__/compilerCacheInvalidator.test.js new file mode 100644 index 00000000..d8b4b232 --- /dev/null +++ b/services/cubejs/src/utils/__tests__/compilerCacheInvalidator.test.js @@ -0,0 +1,82 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; + +import { invalidateCompilerForBranch } from "../compilerCacheInvalidator.js"; + +function makeFakeLRU(initialEntries = []) { + const map = new Map(initialEntries); + return { + keys: () => map.keys(), + delete: (key) => map.delete(key), + size: () => map.size, + has: (key) => map.has(key), + _backing: map, + }; +} + +describe("invalidateCompilerForBranch", () => { + it("evicts only entries whose appId suffix matches the given schemaVersion", () => { + const cache = makeFakeLRU([ + ["CUBEJS_APP_dsvA_schA}", {}], + ["CUBEJS_APP_dsvB_schA}", {}], + ["CUBEJS_APP_dsvC_schB}", {}], + ["CUBEJS_APP_dsvA_schC}", {}], + ]); + + const evicted = invalidateCompilerForBranch( + { compilerCache: cache }, + "schA" + ); + + assert.equal(evicted, 2); + assert.equal(cache.has("CUBEJS_APP_dsvA_schA}"), false); + assert.equal(cache.has("CUBEJS_APP_dsvB_schA}"), false); + assert.equal(cache.has("CUBEJS_APP_dsvC_schB}"), true); + assert.equal(cache.has("CUBEJS_APP_dsvA_schC}"), true); + }); + + it("returns 0 and no-ops on an empty cache", () => { + const cache = makeFakeLRU(); + const evicted = invalidateCompilerForBranch( + { compilerCache: cache }, + "anyhash" + ); + assert.equal(evicted, 0); + }); + + it("returns 0 gracefully when compilerCache is missing or unsupported", () => { + assert.equal(invalidateCompilerForBranch({}, "h"), 0); + assert.equal(invalidateCompilerForBranch({ compilerCache: null }, "h"), 0); + assert.equal( + invalidateCompilerForBranch({ compilerCache: { keys: null } }, "h"), + 0 + ); + }); + + it("is idempotent — second call with no intervening inserts returns 0", () => { + const cache = makeFakeLRU([ + ["CUBEJS_APP_dsvA_schX}", {}], + ["CUBEJS_APP_dsvB_schX}", {}], + ]); + const cubejs = { compilerCache: cache }; + + const first = invalidateCompilerForBranch(cubejs, "schX"); + const second = invalidateCompilerForBranch(cubejs, "schX"); + + assert.equal(first, 2); + assert.equal(second, 0); + }); + + it("ignores non-string keys without throwing", () => { + const cache = makeFakeLRU([ + [Symbol("weird"), {}], + [42, {}], + ["CUBEJS_APP_x_schK}", {}], + ]); + const evicted = invalidateCompilerForBranch( + { compilerCache: cache }, + "schK" + ); + assert.equal(evicted, 1); + }); +}); diff --git a/services/cubejs/src/utils/__tests__/errorCodes.test.js b/services/cubejs/src/utils/__tests__/errorCodes.test.js new file mode 100644 index 00000000..b80eda3f --- /dev/null +++ b/services/cubejs/src/utils/__tests__/errorCodes.test.js @@ -0,0 +1,35 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; + +import { + ErrorCode, + ErrorCodeSet, + isKnownErrorCode, +} from "../errorCodes.js"; + +describe("ErrorCode enum", () => { + it("freezes the enum to prevent runtime mutation", () => { + assert.ok(Object.isFrozen(ErrorCode)); + assert.ok(Object.isFrozen(ErrorCodeSet)); + }); + + it("exposes a Set of every code for test discovery", () => { + const values = new Set(Object.values(ErrorCode)); + assert.deepEqual(values, ErrorCodeSet); + }); + + it("recognises each declared code", () => { + for (const v of Object.values(ErrorCode)) { + assert.equal(isKnownErrorCode(v), true, `known: ${v}`); + } + assert.equal(isKnownErrorCode("nope"), false); + }); + + it("includes refresh_unauthorized (FR-017 / R13 contract enum)", () => { + assert.ok(ErrorCodeSet.has("refresh_unauthorized")); + }); + + it("includes rollback_blocked_authorization for partition/role failures on rollback", () => { + assert.ok(ErrorCodeSet.has("rollback_blocked_authorization")); + }); +}); diff --git a/services/cubejs/src/utils/__tests__/mapHasuraErrorCode.test.js b/services/cubejs/src/utils/__tests__/mapHasuraErrorCode.test.js new file mode 100644 index 00000000..29175642 --- /dev/null +++ b/services/cubejs/src/utils/__tests__/mapHasuraErrorCode.test.js @@ -0,0 +1,57 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; + +import { mapHasuraErrorCode } from "../mapHasuraErrorCode.js"; +import { ErrorCode } from "../errorCodes.js"; + +describe("mapHasuraErrorCode", () => { + it("returns null for empty / non-array input", () => { + assert.equal(mapHasuraErrorCode(null), null); + assert.equal(mapHasuraErrorCode(undefined), null); + assert.equal(mapHasuraErrorCode([]), null); + }); + + it("returns null when extensions.code is absent", () => { + assert.equal(mapHasuraErrorCode([{ message: "whatever" }]), null); + }); + + it("permission-error maps to delete_blocked_authorization for delete action", () => { + const code = mapHasuraErrorCode( + [{ extensions: { code: "permission-error" } }], + { action: "delete" } + ); + assert.equal(code, ErrorCode.DELETE_BLOCKED_AUTHORIZATION); + }); + + it("permission-error maps to rollback_blocked_authorization for rollback action", () => { + const code = mapHasuraErrorCode( + [{ extensions: { code: "permission-error" } }], + { action: "rollback" } + ); + assert.equal(code, ErrorCode.ROLLBACK_BLOCKED_AUTHORIZATION); + }); + + it("not-exists maps to cube_not_found for meta action", () => { + const code = mapHasuraErrorCode( + [{ extensions: { code: "not-exists" } }], + { action: "meta" } + ); + assert.equal(code, ErrorCode.CUBE_NOT_FOUND); + }); + + it("constraint-violation maps to delete_blocked_by_references for delete", () => { + const code = mapHasuraErrorCode( + [{ extensions: { code: "constraint-violation" } }], + { action: "delete" } + ); + assert.equal(code, ErrorCode.DELETE_BLOCKED_BY_REFERENCES); + }); + + it("returns null for unknown extensions.code", () => { + const code = mapHasuraErrorCode( + [{ extensions: { code: "totally-unknown" } }], + { action: "delete" } + ); + assert.equal(code, null); + }); +}); diff --git a/services/cubejs/src/utils/__tests__/referenceScanner.test.js b/services/cubejs/src/utils/__tests__/referenceScanner.test.js new file mode 100644 index 00000000..3c884c0e --- /dev/null +++ b/services/cubejs/src/utils/__tests__/referenceScanner.test.js @@ -0,0 +1,131 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; + +import { scanCrossCubeReferences } from "../referenceScanner.js"; + +function mkCube(cubeName, fileName, code) { + return { cubeName, fileName, code }; +} + +describe("scanCrossCubeReferences — FR-008 seven reference kinds", () => { + it("detects `joins` by name reference", () => { + const others = [ + mkCube( + "order_items", + "order_items.yml", + `cubes:\n - name: order_items\n joins:\n - name: orders\n sql: "{CUBE}.order_id = {orders}.id"\n relationship: many_to_one\n` + ), + ]; + const hits = scanCrossCubeReferences("orders", others); + assert.ok(hits.some((h) => h.referenceKind === "joins")); + assert.equal(hits[0].referringCube, "order_items"); + assert.equal(hits[0].file, "order_items.yml"); + }); + + it("detects `extends` chain", () => { + const others = [ + mkCube( + "derived", + "derived.yml", + `cubes:\n - name: derived\n extends: orders\n sql_table: derived\n` + ), + ]; + const hits = scanCrossCubeReferences("orders", others); + assert.ok(hits.some((h) => h.referenceKind === "extends")); + }); + + it("detects measure/dimension `formula` reference via `{cube}.field`", () => { + const others = [ + mkCube( + "reports", + "reports.yml", + `cubes:\n - name: reports\n measures:\n - name: derived\n type: number\n sql: "{orders.revenue}"\n` + ), + ]; + const hits = scanCrossCubeReferences("orders", others); + assert.ok(hits.some((h) => h.referenceKind === "formula")); + }); + + it("detects `segment` inheritance via segment sql referencing another cube", () => { + const others = [ + mkCube( + "segs", + "segs.yml", + `cubes:\n - name: segs\n segments:\n - name: active\n sql: "{orders}.status = 'active'"\n` + ), + ]; + const hits = scanCrossCubeReferences("orders", others); + assert.ok(hits.some((h) => h.referenceKind === "segment")); + }); + + it("detects `pre_aggregation` rollup references", () => { + const others = [ + mkCube( + "rollup_cube", + "rollup.yml", + `cubes:\n - name: rollup_cube\n pre_aggregations:\n - name: daily\n measures: [orders.count, orders.revenue]\n time_dimension: orders.created_at\n granularity: day\n` + ), + ]; + const hits = scanCrossCubeReferences("orders", others); + assert.ok(hits.some((h) => h.referenceKind === "pre_aggregation")); + }); + + it("detects `filter_params` cross-cube reference", () => { + const others = [ + mkCube( + "other", + "other.yml", + `cubes:\n - name: other\n measures:\n - name: count\n type: count\n sql: "CASE WHEN FILTER_PARAMS.orders.created_at.filter('created_at') THEN 1 ELSE 0 END"\n` + ), + ]; + const hits = scanCrossCubeReferences("orders", others); + assert.ok(hits.some((h) => h.referenceKind === "filter_params")); + }); + + it("detects `sub_query` reference via sub_query + cross-cube sql", () => { + const others = [ + mkCube( + "stats", + "stats.yml", + `cubes:\n - name: stats\n dimensions:\n - name: orders_total\n sub_query: true\n sql: "{orders.revenue}"\n type: number\n` + ), + ]; + const hits = scanCrossCubeReferences("orders", others); + assert.ok(hits.some((h) => h.referenceKind === "sub_query")); + }); + + it("returns empty array when no cube references the target", () => { + const others = [ + mkCube( + "lonely", + "lonely.yml", + `cubes:\n - name: lonely\n sql_table: lonely\n` + ), + ]; + const hits = scanCrossCubeReferences("orders", others); + assert.deepEqual(hits, []); + }); + + it("does not match a cube on itself (self-reference guard)", () => { + const others = [ + mkCube( + "orders", + "orders.yml", + `cubes:\n - name: orders\n sql_table: public.orders\n measures:\n - name: count\n type: count\n` + ), + ]; + const hits = scanCrossCubeReferences("orders", others); + assert.deepEqual(hits, []); + }); + + it("emits line numbers (1-based) for each hit", () => { + const code = `cubes:\n - name: other\n joins:\n - name: orders\n sql: "{CUBE}.id = {orders}.id"\n`; + const hits = scanCrossCubeReferences("orders", [ + mkCube("other", "other.yml", code), + ]); + assert.ok(hits.length >= 1); + for (const h of hits) { + assert.ok(typeof h.line === "number" && h.line >= 1); + } + }); +}); diff --git a/services/cubejs/src/utils/__tests__/requireOwnerOrAdmin.test.js b/services/cubejs/src/utils/__tests__/requireOwnerOrAdmin.test.js new file mode 100644 index 00000000..3ac8f2b3 --- /dev/null +++ b/services/cubejs/src/utils/__tests__/requireOwnerOrAdmin.test.js @@ -0,0 +1,54 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; + +import { requireOwnerOrAdmin } from "../requireOwnerOrAdmin.js"; + +describe("requireOwnerOrAdmin", () => { + it("returns true for an owner on the target team", () => { + const user = { + members: [ + { + team_id: "t1", + member_roles: [{ team_role: "owner" }], + }, + ], + }; + assert.equal(requireOwnerOrAdmin(user, "t1"), true); + }); + + it("returns true for an admin on the target team", () => { + const user = { + members: [ + { team_id: "t1", member_roles: [{ team_role: "admin" }] }, + ], + }; + assert.equal(requireOwnerOrAdmin(user, "t1"), true); + }); + + it("returns false for a plain member", () => { + const user = { + members: [ + { team_id: "t1", member_roles: [{ team_role: "member" }] }, + ], + }; + assert.equal(requireOwnerOrAdmin(user, "t1"), false); + }); + + it("returns false when the caller has admin on a different team", () => { + const user = { + members: [ + { team_id: "other", member_roles: [{ team_role: "admin" }] }, + ], + }; + assert.equal(requireOwnerOrAdmin(user, "t1"), false); + }); + + it("returns false on malformed input", () => { + assert.equal(requireOwnerOrAdmin(null, "t1"), false); + assert.equal(requireOwnerOrAdmin({}, "t1"), false); + assert.equal(requireOwnerOrAdmin({ members: null }, "t1"), false); + assert.equal(requireOwnerOrAdmin({ members: [] }, "t1"), false); + assert.equal(requireOwnerOrAdmin({ members: [{}] }, "t1"), false); + assert.equal(requireOwnerOrAdmin({ members: [{ team_id: "t1" }] }, "t1"), false); + }); +}); diff --git a/services/cubejs/src/utils/__tests__/versionDiff.test.js b/services/cubejs/src/utils/__tests__/versionDiff.test.js new file mode 100644 index 00000000..762d2a50 --- /dev/null +++ b/services/cubejs/src/utils/__tests__/versionDiff.test.js @@ -0,0 +1,116 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; + +import { diffVersions } from "../versionDiff.js"; + +function mkRow(name, code, checksum) { + return { id: name, name, code, checksum: checksum || null }; +} + +const ordersV1 = `cubes: + - name: orders + sql_table: public.orders + measures: + - name: count + type: count + dimensions: + - name: id + type: number + sql: id + primary_key: true +`; + +const ordersV2 = `cubes: + - name: orders + sql_table: public.orders + measures: + - name: count + type: count + - name: revenue + type: sum + sql: amount + dimensions: + - name: id + type: number + sql: id + primary_key: true +`; + +const customers = `cubes: + - name: customers + sql_table: public.customers + measures: + - name: count + type: count + dimensions: + - name: id + type: number + sql: id + primary_key: true +`; + +describe("diffVersions", () => { + it("returns all empty arrays when both versions are identical", () => { + const out = diffVersions({ + fromDataschemas: [mkRow("orders.yml", ordersV1)], + toDataschemas: [mkRow("orders.yml", ordersV1)], + }); + assert.deepEqual(out.addedCubes, []); + assert.deepEqual(out.removedCubes, []); + assert.deepEqual(out.modifiedCubes, []); + }); + + it("skips byte-identical files even when code strings differ by checksum match", () => { + const out = diffVersions({ + fromDataschemas: [mkRow("orders.yml", ordersV1, "same")], + toDataschemas: [mkRow("orders.yml", ordersV1, "same")], + }); + assert.deepEqual(out.modifiedCubes, []); + }); + + it("detects an added cube (new file in toDataschemas)", () => { + const out = diffVersions({ + fromDataschemas: [mkRow("orders.yml", ordersV1)], + toDataschemas: [ + mkRow("orders.yml", ordersV1), + mkRow("customers.yml", customers), + ], + }); + assert.equal(out.addedCubes.length, 1); + assert.equal(out.addedCubes[0].cubeName, "customers"); + assert.equal(out.addedCubes[0].file, "customers.yml"); + assert.deepEqual(out.removedCubes, []); + assert.deepEqual(out.modifiedCubes, []); + }); + + it("detects a removed cube (file missing from toDataschemas)", () => { + const out = diffVersions({ + fromDataschemas: [ + mkRow("orders.yml", ordersV1), + mkRow("customers.yml", customers), + ], + toDataschemas: [mkRow("orders.yml", ordersV1)], + }); + assert.deepEqual(out.addedCubes, []); + assert.equal(out.removedCubes.length, 1); + assert.equal(out.removedCubes[0].cubeName, "customers"); + }); + + it("detects modified cubes with per-measure field changes", () => { + const out = diffVersions({ + fromDataschemas: [mkRow("orders.yml", ordersV1)], + toDataschemas: [mkRow("orders.yml", ordersV2)], + }); + assert.deepEqual(out.addedCubes, []); + assert.deepEqual(out.removedCubes, []); + assert.equal(out.modifiedCubes.length, 1); + const mod = out.modifiedCubes[0]; + assert.equal(mod.cubeName, "orders"); + assert.equal(mod.file, "orders.yml"); + assert.ok(Array.isArray(mod.changes)); + assert.ok(mod.changes.length >= 1); + const measures = mod.changes.find((c) => c.field === "measures"); + assert.ok(measures, "expected a measures-level change entry"); + assert.ok(measures.added.includes("revenue")); + }); +}); diff --git a/services/cubejs/src/utils/auditWriter.js b/services/cubejs/src/utils/auditWriter.js new file mode 100644 index 00000000..6e2178d3 --- /dev/null +++ b/services/cubejs/src/utils/auditWriter.js @@ -0,0 +1,126 @@ +import { fetchGraphQL } from "./graphql.js"; + +const INSERT_AUDIT_LOG = ` + mutation InsertAuditLog( + $action: String! + $user_id: uuid! + $datasource_id: uuid + $branch_id: uuid + $target_id: uuid! + $outcome: String! + $error_code: String + $payload: jsonb + ) { + insert_audit_logs_one(object: { + action: $action, + user_id: $user_id, + datasource_id: $datasource_id, + branch_id: $branch_id, + target_id: $target_id, + outcome: $outcome, + error_code: $error_code, + payload: $payload + }) { + id + } + } +`; + +const MAX_ATTEMPTS = 3; +const INITIAL_BACKOFF_MS = 50; + +function sleep(ms) { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +/** + * writeAuditLog — durable audit writer for persistent mutating operations + * (FR-016). Handlers call this directly on every failure branch; success + * inserts are captured by Hasura event triggers. + * + * Best-effort with 3 attempts and exponential backoff. If every attempt fails, + * a structured stderr line is emitted as a last-resort observation. The + * caller is not informed of the write failure — audit bookkeeping must never + * block a user response. + * + * The admin-secret path is used (no authToken passed to fetchGraphQL) so + * permission policies on `audit_logs` (admin-only) do not reject the write. + * + * @param {object} args + * @param {'dataschema_delete'|'version_rollback'} args.action + * @param {string} args.userId + * @param {string} args.targetId + * @param {string} [args.datasourceId] + * @param {string} [args.branchId] + * @param {'success'|'failure'} args.outcome + * @param {string} [args.errorCode] - required when outcome==='failure' + * @param {object} [args.payload] - operation-specific detail (jsonb) + * @returns {Promise<{ok:true, id:string} | {ok:false}>} + */ +export async function writeAuditLog({ + action, + userId, + datasourceId = null, + branchId = null, + targetId, + outcome, + errorCode = null, + payload = null, +} = {}) { + if (!action || !userId || !targetId || !outcome) { + console.error( + JSON.stringify({ + level: "error", + event: "audit_write_failed", + reason: "missing_required_fields", + action, + userId, + targetId, + outcome, + ts: new Date().toISOString(), + }) + ); + return { ok: false }; + } + + const variables = { + action, + user_id: userId, + datasource_id: datasourceId, + branch_id: branchId, + target_id: targetId, + outcome, + error_code: errorCode, + payload, + }; + + let lastErr; + for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt += 1) { + try { + const res = await fetchGraphQL(INSERT_AUDIT_LOG, variables); + const id = res?.data?.insert_audit_logs_one?.id; + if (id) return { ok: true, id }; + lastErr = new Error("insert_audit_logs_one returned no id"); + } catch (err) { + lastErr = err; + } + if (attempt < MAX_ATTEMPTS) { + await sleep(INITIAL_BACKOFF_MS * 2 ** (attempt - 1)); + } + } + + console.error( + JSON.stringify({ + level: "error", + event: "audit_write_failed", + reason: lastErr?.message || "unknown", + action, + userId, + targetId, + outcome, + errorCode, + ts: new Date().toISOString(), + }) + ); + return { ok: false }; +} diff --git a/services/cubejs/src/utils/checkSqlAuth.js b/services/cubejs/src/utils/checkSqlAuth.js index 31a2a973..43c3cab8 100644 --- a/services/cubejs/src/utils/checkSqlAuth.js +++ b/services/cubejs/src/utils/checkSqlAuth.js @@ -26,29 +26,60 @@ const buildSqlSecurityContext = (sqlCredentials) => { teamId ); - const dataSourceContext = buildSecurityContext(sqlCredentials?.datasource); + // Resolve team settings + per-member properties so queryRewrite rules can + // evaluate `property_source: team` / `property_source: member` lookups for + // SQL API logins. Without these, every rule blocks and queries get rewritten + // to the `__blocked_by_access_control__` sentinel. + const teamMember = Array.isArray(allMembers) + ? allMembers.find((m) => m.team_id === teamId) + : null; + const teamSettings = teamMember?.team?.settings || {}; + const memberProperties = teamMember?.properties || {}; + + const dataSourceContext = buildSecurityContext( + sqlCredentials?.datasource, + undefined, + undefined, + teamSettings + ); return { dataSource: dataSourceContext, ...dataSourceAccessList, + teamProperties: teamSettings, + memberProperties, }; }; /** * Check SQL authentication for a user. + * + * Cube.js v1.6 invokes this callback as `checkSqlAuth(request, user, password)` + * (see @cubejs-backend/api-gateway/dist/src/sql-server.js — the callback is + * wrapped with three positional args). Earlier builds passed a single object + * `{ username, password }`; the defensive branches below keep backward- + * compatibility with that older shape. + * * Supports two authentication methods: - * 1. WorkOS JWT as password (new): password is a JWT, username is datasource ID - * 2. Legacy sql_credentials lookup (existing): username/password from sql_credentials table + * 1. WorkOS / FraiOS JWT as password: password is a JWT, username is the datasource ID + * 2. Legacy sql_credentials lookup: username/password from the sql_credentials table * - * @param {null} _ - Unused parameter. - * @param {Object} user - The user object with username and password. - * @returns {Promise} - Resolves to { password, securityContext } + * @param {Object} request - Cube.js SQL request metadata (protocol, method, apiType) + * @param {string|Object} userArg - Username string (v1.6+) or legacy { username, password } object + * @param {string} [passwordArg] - Password string (v1.6+); absent in legacy object-shape calls + * @returns {Promise<{ password: string, securityContext: Object }>} */ -const checkSqlAuth = async (_, user) => { - const password = typeof user === "string" ? user : user?.password; - const username = typeof user === "string" ? _ : user?.username; - - // Detect if password looks like a JWT (WorkOS RS256) +const checkSqlAuth = async (request, userArg, passwordArg) => { + // Resolve the two shapes Cube has used for this callback: + // new: (request, username: string, password: string) + // legacy: (_req, { username, password }) + const username = + typeof userArg === "string" ? userArg : userArg?.username; + const password = + passwordArg ?? + (typeof userArg === "string" ? undefined : userArg?.password); + + // Detect if password looks like a JWT (WorkOS RS256 / FraiOS HS256) if (password && password.includes(".") && password.split(".").length === 3) { const tokenType = detectTokenType(password); @@ -134,8 +165,12 @@ const checkSqlAuth = async (_, user) => { } } - // Legacy sql_credentials path (unchanged) - const sqlCredentials = await findSqlCredentials(username || user); + // Legacy sql_credentials path — lookup by the plaintext username. + // Cube.js compares the supplied password against `password` in the return value. + if (!username || typeof username !== "string") { + throw new Error("Incorrect user name or password"); + } + const sqlCredentials = await findSqlCredentials(username); return { password: sqlCredentials?.password, diff --git a/services/cubejs/src/utils/compilerCacheInvalidator.js b/services/cubejs/src/utils/compilerCacheInvalidator.js new file mode 100644 index 00000000..8336f255 --- /dev/null +++ b/services/cubejs/src/utils/compilerCacheInvalidator.js @@ -0,0 +1,33 @@ +/** + * Invalidate compiler-cache entries scoped to a single branch's schemaVersion. + * + * The cubejs compiler cache is an LRU keyed by `appId`, which is built in + * `services/cubejs/index.js` as: + * `CUBEJS_APP_${dataSourceVersion}_${schemaVersion}}` (trailing `}` is + * intentional — it comes from the original template literal). + * + * Two users querying the same branch share one `schemaVersion` but may differ + * in `dataSourceVersion` (which folds team-properties hash). Eviction therefore + * iterates keys and removes every key whose suffix matches `_${schemaVersion}}`. + * + * FR-004 blast radius: compiler cache only, scoped to the target branch. + * Pre-aggregation cache and user-scope caches are never touched. + * + * @param {{compilerCache?: {keys?: () => Iterable, delete: (key:unknown)=>unknown}}} cubejs + * @param {string} schemaVersion - branch's current schemaVersion hash + * @returns {number} count of LRU entries removed + */ +export function invalidateCompilerForBranch(cubejs, schemaVersion) { + const cache = cubejs?.compilerCache; + if (!cache || typeof cache.keys !== "function") return 0; + + const suffix = `_${schemaVersion}}`; + let evicted = 0; + for (const key of cache.keys()) { + if (typeof key === "string" && key.endsWith(suffix)) { + cache.delete(key); + evicted += 1; + } + } + return evicted; +} diff --git a/services/cubejs/src/utils/dataSourceHelpers.js b/services/cubejs/src/utils/dataSourceHelpers.js index 079c00fb..45a70137 100644 --- a/services/cubejs/src/utils/dataSourceHelpers.js +++ b/services/cubejs/src/utils/dataSourceHelpers.js @@ -1,3 +1,5 @@ +import { createHash } from "crypto"; + import { fetchGraphQL } from "./graphql.js"; import { fetchWorkOSUserProfile } from "./workosAuth.js"; @@ -92,6 +94,11 @@ const membersFragment = ` members { id team_id + properties + team { + id + settings + } member_roles { id team_role @@ -205,6 +212,35 @@ const dataschemasQuery = ` } `; +const versionDataschemasQuery = ` + query VersionDataschemas($versionId: uuid!) { + dataschemas(where: {version_id: {_eq: $versionId}}) { + id + name + code + checksum + } + } +`; + +const versionBranchQuery = ` + query VersionBranch($versionId: uuid!) { + versions_by_pk(id: $versionId) { + id + branch_id + branch { + id + status + datasource_id + datasource { + id + team_id + } + } + } + } +`; + export const findUser = async ({ userId }) => { const cached = getUserCacheEntry(userId); if (cached) return cached; @@ -276,6 +312,108 @@ export const findDataSchemasByIds = async ({ ids }) => { return dataSchemas; }; +/** + * Load every dataschema attached to a single version. + * Returns `[{id, name, code, checksum}]`. + */ +export const findVersionDataschemas = async ({ versionId }) => { + const res = await fetchGraphQL(versionDataschemasQuery, { versionId }); + return res?.data?.dataschemas || []; +}; + +/** + * Resolve the branch + datasource ownership of a version in one query. + * Returns `{versionId, branchId, branchStatus, datasourceId, teamId}` + * or `null` if the version does not exist / the caller cannot see it. + */ +export const findVersionBranch = async ({ versionId }) => { + const res = await fetchGraphQL(versionBranchQuery, { versionId }); + const row = res?.data?.versions_by_pk; + if (!row) return null; + return { + versionId: row.id, + branchId: row.branch_id, + branchStatus: row.branch?.status, + datasourceId: row.branch?.datasource_id || row.branch?.datasource?.id, + teamId: row.branch?.datasource?.team_id, + }; +}; + +/** + * Insert a new version on `branchId` whose dataschemas are byte-identical + * clones of `toVersionId`'s dataschemas. FR-013 / FR-013a: + * - only dataschemas are cloned (no explorations/alerts/alerts), + * - the new row uses `origin: 'rollback'`, + * - the caller's minted Hasura token is used so owner/admin permission + * policies are enforced at the database layer. + * + * Returns `{newVersionId, clonedDataschemaCount}` on success, + * `{errors}` on any Hasura permission/constraint failure so the caller can + * map the extensions.code via mapHasuraErrorCode(). + */ +export const rollbackVersion = async ({ + branchId, + toVersionId, + userId, + datasourceId, + authToken, +}) => { + const originals = await findVersionDataschemas({ versionId: toVersionId }); + // The `set_public_dataschemas_checksum` BEFORE-INSERT trigger computes + // dataschema-level checksums; we do NOT set one here or Hasura's insert + // permission rejects the extra column. + const clonedData = originals.map((row) => ({ + name: row.name, + code: row.code, + user_id: userId, + datasource_id: datasourceId, + })); + + // Version-level checksum: md5 over the concatenated dataschema codes in + // a stable order. Matches the `version.checksum` NOT NULL constraint. + const versionChecksum = md5OfCode( + originals + .map((r) => r.name) + .sort() + .map((n) => `${n}:${originals.find((x) => x.name === n)?.code || ""}`) + .join("\n") + ); + + const object = { + branch_id: branchId, + user_id: userId, + origin: "rollback", + checksum: versionChecksum, + dataschemas: { data: clonedData }, + }; + + const res = await fetchGraphQL( + upsertVersionMutation, + { object }, + authToken, + { preserveErrors: true } + ); + + if (res?.errors) { + return { errors: res.errors }; + } + + const newVersionId = res?.data?.insert_versions_one?.id; + if (!newVersionId) { + return { + errors: [ + { message: "insert_versions_one returned no id", extensions: {} }, + ], + }; + } + + return { newVersionId, clonedDataschemaCount: clonedData.length }; +}; + +function md5OfCode(code) { + return createHash("md5").update(String(code)).digest("hex"); +} + // --- Identity resolution functions (T007) --- const findAccountByWorkosIdQuery = ` diff --git a/services/cubejs/src/utils/directVerifyAuth.js b/services/cubejs/src/utils/directVerifyAuth.js new file mode 100644 index 00000000..3b7be484 --- /dev/null +++ b/services/cubejs/src/utils/directVerifyAuth.js @@ -0,0 +1,112 @@ +import jwt from "jsonwebtoken"; + +import { + detectTokenType, + verifyWorkOSToken, + verifyFraiOSToken, +} from "./workosAuth.js"; +import { + provisionUserFromWorkOS, + provisionUserFromFraiOS, +} from "./dataSourceHelpers.js"; + +const { JWT_KEY, JWT_ALGORITHM } = process.env; + +/** + * Shared direct-verify entry point for branch-scoped Model-Management routes. + * + * Endpoints whose request contract does NOT carry `x-hasura-datasource-id` + * cannot mount behind the existing `checkAuthMiddleware` (which 400s when + * the header is absent — see utils/checkAuth.js). Instead those handlers + * call this helper, which mirrors the inline auth path already used by + * `routes/metaAll.js` and the `hasuraProxy` authMiddleware. + * + * Returns `{token, payload, tokenType, userId}` on success, and + * `{error: {status, code, message}}` on any auth failure — never throws. + * + * @param {import('express').Request} req + * @returns {Promise<{token:string, payload:object, tokenType:string, userId:string} | {error:{status:number, code:string, message:string}}>} + */ +export async function verifyAndProvision(req) { + const authHeader = req.headers?.authorization; + if (!authHeader) { + return { + error: { + status: 403, + code: "auth_missing", + message: "Authorization header required", + }, + }; + } + + const token = authHeader.startsWith("Bearer ") + ? authHeader.slice(7) + : authHeader; + + if (!token || token.split(".").length !== 3) { + return { + error: { + status: 403, + code: "auth_invalid", + message: "Bearer token required", + }, + }; + } + + let tokenType; + try { + tokenType = detectTokenType(token); + } catch { + return { + error: { + status: 403, + code: "auth_invalid", + message: "Token could not be decoded", + }, + }; + } + + try { + if (tokenType === "workos") { + const payload = await verifyWorkOSToken(token); + const userId = await provisionUserFromWorkOS(payload); + return { token, payload, tokenType, userId }; + } + if (tokenType === "fraios") { + const payload = await verifyFraiOSToken(token); + const userId = await provisionUserFromFraiOS(payload); + return { token, payload, tokenType, userId }; + } + // Hasura HS256 — accepted for parity with checkAuth.js so the Model + // Management routes honour the same tokens already used by catalog + + // discovery endpoints (FR-015). No provisioning needed; the token is + // already scoped to an existing user. + const payload = jwt.verify(token, JWT_KEY, { + algorithms: [JWT_ALGORITHM || "HS256"], + }); + const userId = + payload?.hasura?.["x-hasura-user-id"] || payload?.sub; + if (!userId) { + return { + error: { + status: 403, + code: "auth_invalid", + message: "Token missing x-hasura-user-id", + }, + }; + } + return { token, payload, tokenType: "hasura", userId }; + } catch (err) { + const status = err?.status || 403; + return { + error: { + status, + code: status === 503 ? "auth_unavailable" : "auth_invalid", + message: + status === 503 + ? "Authentication service unavailable" + : err?.message || "Token verification failed", + }, + }; + } +} diff --git a/services/cubejs/src/utils/errorCodes.js b/services/cubejs/src/utils/errorCodes.js new file mode 100644 index 00000000..f8637ee2 --- /dev/null +++ b/services/cubejs/src/utils/errorCodes.js @@ -0,0 +1,31 @@ +/** + * Canonical Model-Management API error codes (FR-017). + * + * Every code emitted by a Model-Management handler MUST come from this enum. + * Every `contracts/011-model-mgmt-api/*.yaml` ErrorCode enum MUST match this + * list exactly — drift is caught at build time by scripts/lint-error-codes.mjs. + */ + +export const ErrorCode = Object.freeze({ + VALIDATE_INVALID_MODE: "validate_invalid_mode", + VALIDATE_TARGET_NOT_FOUND: "validate_target_not_found", + VALIDATE_UNRESOLVED_REFERENCE: "validate_unresolved_reference", + REFRESH_BRANCH_NOT_VISIBLE: "refresh_branch_not_visible", + REFRESH_UNAUTHORIZED: "refresh_unauthorized", + DELETE_BLOCKED_BY_REFERENCES: "delete_blocked_by_references", + DELETE_BLOCKED_HISTORICAL_VERSION: "delete_blocked_historical_version", + DELETE_BLOCKED_AUTHORIZATION: "delete_blocked_authorization", + CUBE_NOT_FOUND: "cube_not_found", + DIFF_CROSS_BRANCH: "diff_cross_branch", + DIFF_INVALID_REQUEST: "diff_invalid_request", + ROLLBACK_VERSION_NOT_ON_BRANCH: "rollback_version_not_on_branch", + ROLLBACK_BLOCKED_AUTHORIZATION: "rollback_blocked_authorization", + ROLLBACK_INVALID_REQUEST: "rollback_invalid_request", + ROLLBACK_SOURCE_COLUMNS_MISSING: "rollback_source_columns_missing", +}); + +export const ErrorCodeSet = Object.freeze(new Set(Object.values(ErrorCode))); + +export function isKnownErrorCode(code) { + return ErrorCodeSet.has(code); +} diff --git a/services/cubejs/src/utils/graphql.js b/services/cubejs/src/utils/graphql.js index b80c5046..896af7c1 100644 --- a/services/cubejs/src/utils/graphql.js +++ b/services/cubejs/src/utils/graphql.js @@ -3,7 +3,27 @@ import fetch from "node-fetch"; const HASURA_ENDPOINT = process.env.HASURA_ENDPOINT; const HASURA_GRAPHQL_ADMIN_SECRET = process.env.HASURA_GRAPHQL_ADMIN_SECRET; -export const fetchGraphQL = async (query, variables, authToken) => { +/** + * Call Hasura GraphQL. + * + * Legacy behaviour (default): on `res.errors`, throw with `status = 503`. + * + * Opt-in (`{preserveErrors: true}`): return `{data, errors, status}` without + * throwing, so callers that need FR-017-compliant error mapping can read + * Hasura's original `extensions.code` values (e.g. `permission-error`, + * `not-exists`) and map them to stable Model-Management error codes. + * + * @param {string} query + * @param {object} [variables] + * @param {string} [authToken] + * @param {{preserveErrors?: boolean}} [options] + */ +export const fetchGraphQL = async ( + query, + variables, + authToken, + options = {} +) => { const headers = { "x-hasura-admin-secret": HASURA_GRAPHQL_ADMIN_SECRET, }; @@ -25,6 +45,13 @@ export const fetchGraphQL = async (query, variables, authToken) => { const res = await result.json(); if (res.errors) { + if (options?.preserveErrors) { + return { + data: res.data ?? null, + errors: res.errors, + status: result.status, + }; + } const error = new Error(JSON.stringify(res.errors)); error.status = 503; throw error; diff --git a/services/cubejs/src/utils/mapHasuraErrorCode.js b/services/cubejs/src/utils/mapHasuraErrorCode.js new file mode 100644 index 00000000..03b55d5c --- /dev/null +++ b/services/cubejs/src/utils/mapHasuraErrorCode.js @@ -0,0 +1,42 @@ +import { ErrorCode } from "./errorCodes.js"; + +/** + * Map the first entry of a Hasura `errors[]` array onto a stable + * Model-Management error code. + * + * Hasura emits `extensions.code` values like `permission-error`, `not-exists`, + * `constraint-violation`, `validation-failed`, `invalid-jwt`, etc. The map + * below covers every code actually reached by routes in this feature. Any + * code outside the map returns `null`, which callers treat as + * "propagate as 503 hasura_unavailable" per R11. + * + * @param {Array<{extensions?:{code?:string}, message?:string}>|null|undefined} errors + * @param {{action?: 'delete'|'rollback'|'validate'|'meta'|'diff'|'refresh'}} [ctx] + * @returns {string|null} + */ +export function mapHasuraErrorCode(errors, ctx = {}) { + if (!Array.isArray(errors) || errors.length === 0) return null; + const first = errors[0]; + const code = first?.extensions?.code; + if (!code) return null; + + const action = ctx.action; + + if (code === "permission-error" || code === "access-denied") { + if (action === "delete") return ErrorCode.DELETE_BLOCKED_AUTHORIZATION; + if (action === "rollback") return ErrorCode.ROLLBACK_BLOCKED_AUTHORIZATION; + return null; + } + if (code === "not-exists" || code === "not-found") { + if (action === "meta") return ErrorCode.CUBE_NOT_FOUND; + if (action === "validate") return ErrorCode.VALIDATE_TARGET_NOT_FOUND; + if (action === "rollback") return ErrorCode.ROLLBACK_VERSION_NOT_ON_BRANCH; + return null; + } + if (code === "constraint-violation" || code === "constraint-error") { + if (action === "delete") return ErrorCode.DELETE_BLOCKED_BY_REFERENCES; + return null; + } + + return null; +} diff --git a/services/cubejs/src/utils/metaForBranch.js b/services/cubejs/src/utils/metaForBranch.js new file mode 100644 index 00000000..2c7857ad --- /dev/null +++ b/services/cubejs/src/utils/metaForBranch.js @@ -0,0 +1,91 @@ +import defineUserScope from "./defineUserScope.js"; + +/** + * compileMetaForBranch — load + compile the dataschemas of a chosen + * (branch, version) pair and return the raw visibility-filtered metaConfig. + * + * This helper centralises the compilation path shared by: + * - /api/v1/meta-all (aggregate catalog, which then summarizes) + * - /api/v1/meta/cube/:cubeName (single-cube, which returns the raw envelope) + * + * The existing `metaForDatasource` inside routes/metaAll.js summarized each + * cube down to `{measures:string[], dimensions:string[], segments:string[]}`, + * and always picked `ds.branches.find(b => b.status === 'active')`. Neither + * behaviour is appropriate for the single-cube route, which: + * - needs full member envelopes (so the agent can inspect sql/type/meta without + * a second round-trip), and + * - must honour an explicit `x-hasura-branch-id` header. + * + * Returns `{ branchId, versionId, metaConfig }` where `metaConfig` is the + * array Cube.js emits from `compilerApi.metaConfig()` after + * `apiGateway.filterVisibleItemsInMeta(context, …)`. + * + * @param {object} args + * @param {object} args.apiGateway - `cubejs.apiGateway()` instance + * @param {import('express').Request} args.req + * @param {string} args.userId + * @param {string} args.authToken + * @param {object} args.dataSource - a single dataSource row (includes `.branches`) + * @param {string} [args.branchId] - optional; defaults to the datasource's active branch + * @param {string} [args.versionId] - optional; defaults to the branch's latest version + * @param {Array} args.allMembers - `user.members`, forwarded to defineUserScope + * @param {string} [args.requestId] + * @returns {Promise<{branchId:string, versionId:string|null, metaConfig:Array}>} + */ +export async function compileMetaForBranch({ + apiGateway, + req, + userId, + authToken, + dataSource, + branchId, + versionId, + allMembers, + requestId, +}) { + const branches = dataSource?.branches || []; + const branch = branchId + ? branches.find((b) => b.id === branchId) + : branches.find((b) => b.status === "active") || branches[0]; + + if (!branch) { + const err = new Error("branch_not_found"); + err.status = 404; + throw err; + } + + const versions = branch.versions || []; + const version = versionId + ? versions.find((v) => v.id === versionId) + : versions[0] || null; + + const userScope = defineUserScope( + [dataSource], + allMembers, + dataSource.id, + branch.id, + version?.id + ); + + const securityContext = { authToken, userId, userScope }; + const context = await apiGateway.contextByReq( + req, + securityContext, + requestId || + req?.get?.("x-request-id") || + req?.get?.("traceparent") || + `metaForBranch-${Date.now()}` + ); + + const compilerApi = await apiGateway.getCompilerApi(context); + let metaConfig = await compilerApi.metaConfig(context, { + requestId: context.requestId, + }); + metaConfig = apiGateway.filterVisibleItemsInMeta(context, metaConfig) || []; + + return { + branchId: branch.id, + versionId: version?.id || null, + metaConfig, + }; +} diff --git a/services/cubejs/src/utils/referenceScanner.js b/services/cubejs/src/utils/referenceScanner.js new file mode 100644 index 00000000..904190ac --- /dev/null +++ b/services/cubejs/src/utils/referenceScanner.js @@ -0,0 +1,114 @@ +/** + * scanCrossCubeReferences — detect blocking references to a target cube. + * + * Implements FR-008: deletion of a cube MUST be blocked when any other cube on + * the same branch carries a reference to the target via one of seven kinds: + * `joins`, `extends`, `sub_query`, `formula`, `segment`, `pre_aggregation`, + * or `filter_params`. + * + * Inputs: + * targetCubeName — cube identifier (e.g. `"orders"`). + * otherCubes — array of `{cubeName, fileName, code}` for every OTHER + * cube on the same branch. The caller MUST omit the target + * cube itself; self-matches are still filtered defensively. + * + * Returns: array of `{referringCube, file, referenceKind, line}` in the order + * encountered. The `referenceKind` values match the enum on + * `BlockingReference.referenceKind` in data-model §2.4. + * + * Implementation is textual-pattern based per research.md §R3. A cube is a + * blocking referrer if any of the seven patterns match its source. Textual + * scan is deterministic, O(n·m), and avoids duplicating compiler parse state. + * + * @param {string} targetCubeName + * @param {Array<{cubeName:string, fileName:string, code:string}>} otherCubes + * @returns {Array<{referringCube:string, file:string, referenceKind:string, line:number}>} + */ +export function scanCrossCubeReferences(targetCubeName, otherCubes) { + if (!targetCubeName || !Array.isArray(otherCubes)) return []; + + const target = escapeForRegex(targetCubeName); + + const patterns = [ + { + kind: "filter_params", + re: new RegExp(`FILTER_PARAMS\\.${target}\\.`, "g"), + }, + { + kind: "extends", + re: new RegExp(`extends\\s*:\\s*['\"]?${target}['\"]?\\b`, "g"), + }, + { + kind: "joins", + re: new RegExp( + `joins\\s*:[\\s\\S]*?-\\s*name\\s*:\\s*['\"]?${target}['\"]?\\b`, + "g" + ), + }, + { + kind: "pre_aggregation", + re: new RegExp( + `pre_aggregations\\s*:[\\s\\S]*?${target}\\.[A-Za-z_][A-Za-z0-9_]*`, + "g" + ), + }, + { + kind: "sub_query", + re: new RegExp( + `sub_query\\s*:\\s*true[\\s\\S]*?(?:\\{${target}(?:\\.[A-Za-z_][A-Za-z0-9_]*)?\\}|\\b${target}\\.[A-Za-z_][A-Za-z0-9_]*)`, + "g" + ), + }, + { + kind: "segment", + re: new RegExp( + `segments\\s*:[\\s\\S]*?(?:\\{${target}(?:\\.[A-Za-z_][A-Za-z0-9_]*)?\\}|\\b${target}\\.[A-Za-z_][A-Za-z0-9_]*)`, + "g" + ), + }, + { + kind: "formula", + re: new RegExp( + `(?:\\{${target}(?:\\.[A-Za-z_][A-Za-z0-9_]*)?\\}|\\b${target}\\.[A-Za-z_][A-Za-z0-9_]*)`, + "g" + ), + }, + ]; + + const hits = []; + for (const cube of otherCubes) { + if (!cube || cube.cubeName === targetCubeName || !cube.code) continue; + const code = String(cube.code); + const seen = new Set(); + for (const { kind, re } of patterns) { + re.lastIndex = 0; + let m; + while ((m = re.exec(code))) { + const key = `${kind}:${m.index}`; + if (seen.has(key)) break; + seen.add(key); + hits.push({ + referringCube: cube.cubeName, + file: cube.fileName, + referenceKind: kind, + line: lineAt(code, m.index), + }); + if (!re.global) break; + } + } + } + return hits; +} + +function escapeForRegex(s) { + return String(s).replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +function lineAt(text, index) { + if (index < 0) return 1; + let line = 1; + for (let i = 0; i < index && i < text.length; i += 1) { + if (text.charCodeAt(i) === 10) line += 1; + } + return line; +} diff --git a/services/cubejs/src/utils/requireOwnerOrAdmin.js b/services/cubejs/src/utils/requireOwnerOrAdmin.js new file mode 100644 index 00000000..6a76b063 --- /dev/null +++ b/services/cubejs/src/utils/requireOwnerOrAdmin.js @@ -0,0 +1,24 @@ +/** + * requireOwnerOrAdmin — resolve whether the caller has owner or admin role on + * the given team. Shared by every Model-Management handler gating on + * owner/admin (delete, rollback, refresh, validate-in-branch replace/preview-delete). + * + * A member carries one or more `member_roles` rows, each with a + * `team_role` value in (`owner`, `admin`, `member`, ...). Team membership is + * resolved from `user.members`, as returned by `findUser()`. + * + * @param {{members?: Array<{team_id:string, member_roles?: Array<{team_role:string}>}>}} user + * @param {string} teamId + * @returns {boolean} + */ +export function requireOwnerOrAdmin(user, teamId) { + if (!user || !teamId || !Array.isArray(user.members)) return false; + for (const m of user.members) { + if (m?.team_id !== teamId) continue; + const roles = m.member_roles || []; + for (const r of roles) { + if (r?.team_role === "owner" || r?.team_role === "admin") return true; + } + } + return false; +} diff --git a/services/cubejs/src/utils/versionDiff.js b/services/cubejs/src/utils/versionDiff.js new file mode 100644 index 00000000..888ac271 --- /dev/null +++ b/services/cubejs/src/utils/versionDiff.js @@ -0,0 +1,155 @@ +import YAML from "yaml"; + +import { + parseCubesFromJs, + diffModels, +} from "./smart-generation/diffModels.js"; + +function parseCubes(name, code) { + if (!code) return []; + const isYaml = name?.endsWith(".yml") || name?.endsWith(".yaml"); + try { + if (isYaml) { + const parsed = YAML.parse(code); + return Array.isArray(parsed?.cubes) ? parsed.cubes : []; + } + const cubes = parseCubesFromJs(code); + return Array.isArray(cubes) ? cubes : []; + } catch { + return []; + } +} + +/** + * Group a flat `diffModels` result into per-cube `CubeChange` records. + * + * `diffModels` returns `{fields_added, fields_updated, fields_removed}` arrays + * where every entry carries the `cube` attribute identifying which cube it + * belongs to. This helper re-indexes those flat arrays into the per-cube + * shape required by contracts/version-diff.yaml (`CubeChange.changes[]`). + * + * @param {string} fileName + * @param {string} fromCode + * @param {string} toCode + * @returns {Array<{cubeName:string, file:string, changes:Array}>} + */ +function diffFilePair(fileName, fromCode, toCode) { + const flat = diffModels(fromCode || "", toCode || "", "replace"); + const byCube = new Map(); + + const ensure = (cubeName) => { + if (!byCube.has(cubeName)) { + byCube.set(cubeName, new Map()); + } + return byCube.get(cubeName); + }; + + const bucket = (cubeName, memberType) => { + const cube = ensure(cubeName); + if (!cube.has(memberType)) { + cube.set(memberType, { added: [], removed: [], modified: [] }); + } + return cube.get(memberType); + }; + + for (const entry of flat.fields_added || []) { + if (!entry?.cube) continue; + const b = bucket(entry.cube, entry.member_type || "meta"); + b.added.push(entry.name); + } + for (const entry of flat.fields_removed || []) { + if (!entry?.cube) continue; + const b = bucket(entry.cube, entry.member_type || "meta"); + b.removed.push(entry.name); + } + for (const entry of flat.fields_updated || []) { + if (!entry?.cube) continue; + const b = bucket(entry.cube, entry.member_type || "meta"); + b.modified.push(entry.name); + } + + const cubes = []; + for (const [cubeName, members] of byCube) { + const changes = []; + for (const [memberType, diff] of members) { + const hasAny = + diff.added.length || diff.removed.length || diff.modified.length; + if (!hasAny) continue; + changes.push({ + field: memberType === "measure" + ? "measures" + : memberType === "dimension" + ? "dimensions" + : memberType === "segment" + ? "segments" + : "meta", + added: diff.added, + removed: diff.removed, + modified: diff.modified, + }); + } + if (changes.length > 0) { + cubes.push({ cubeName, file: fileName, changes }); + } + } + return cubes; +} + +/** + * Diff two versions (identified by their dataschema arrays) into the + * `{addedCubes, removedCubes, modifiedCubes}` shape demanded by FR-011 + * and contracts/version-diff.yaml. + * + * Matching is by dataschema `name` (the file name) — a cube is "added" when + * its file is absent from `fromDataschemas` and "removed" when its file is + * absent from `toDataschemas`. Byte-identical files are skipped. + * + * @param {object} args + * @param {Array<{id?:string, name:string, code:string, checksum?:string}>} args.fromDataschemas + * @param {Array<{id?:string, name:string, code:string, checksum?:string}>} args.toDataschemas + */ +export function diffVersions({ fromDataschemas, toDataschemas }) { + const fromByFile = new Map(); + for (const row of fromDataschemas || []) { + if (row?.name) fromByFile.set(row.name, row); + } + const toByFile = new Map(); + for (const row of toDataschemas || []) { + if (row?.name) toByFile.set(row.name, row); + } + + const addedCubes = []; + const removedCubes = []; + const modifiedCubes = []; + + for (const [file, toRow] of toByFile) { + if (!fromByFile.has(file)) { + for (const cube of parseCubes(file, toRow.code)) { + addedCubes.push({ cubeName: cube.name, file }); + } + continue; + } + const fromRow = fromByFile.get(file); + if ( + fromRow.checksum && + toRow.checksum && + fromRow.checksum === toRow.checksum + ) { + continue; + } + if (fromRow.code === toRow.code) continue; + + const perCube = diffFilePair(file, fromRow.code, toRow.code); + for (const cube of perCube) modifiedCubes.push(cube); + } + + for (const [file, fromRow] of fromByFile) { + if (!toByFile.has(file)) { + for (const cube of parseCubes(file, fromRow.code)) { + removedCubes.push({ cubeName: cube.name, file }); + } + } + } + + return { addedCubes, removedCubes, modifiedCubes }; +} diff --git a/services/hasura/metadata/cron_triggers.yaml b/services/hasura/metadata/cron_triggers.yaml index 57b429c6..863d58bd 100644 --- a/services/hasura/metadata/cron_triggers.yaml +++ b/services/hasura/metadata/cron_triggers.yaml @@ -4,3 +4,9 @@ include_in_metadata: true payload: {} comment: "" +- name: audit_logs_retention_90d + webhook: '{{ACTIONS_URL}}/rpc/audit_logs_retention' + schedule: '0 4 * * *' + include_in_metadata: true + payload: {} + comment: "Delete audit_logs rows older than 90 days (FR-016 retention)." diff --git a/services/hasura/metadata/tables.yaml b/services/hasura/metadata/tables.yaml index 7906a943..e42c9808 100644 --- a/services/hasura/metadata/tables.yaml +++ b/services/hasura/metadata/tables.yaml @@ -600,6 +600,46 @@ - admin - user_id: _eq: X-Hasura-User-Id + delete_permissions: + - role: user + permission: + filter: + _and: + - datasource: + team: + members: + _and: + - member_roles: + team_role: + _in: + - owner + - admin + - user_id: + _eq: X-Hasura-User-Id + - version: + is_current: + _eq: true + - version: + branch: + status: + _eq: active + event_triggers: + - name: delete_dataschema_audit + definition: + enable_manual: false + delete: + columns: '*' + retry_conf: + interval_sec: 10 + num_retries: 3 + timeout_sec: 60 + webhook: '{{ACTIONS_URL}}/rpc/audit_dataschema_delete' + cleanup_config: + batch_size: 10000 + clean_invocation_logs: false + clear_older_than: 168 + paused: false + schedule: 0 0 * * * - table: name: datasources schema: public @@ -1631,6 +1671,7 @@ columns: - branch_id - checksum + - origin - user_id select_permissions: - role: user @@ -1640,7 +1681,9 @@ - checksum - created_at - id + - is_current - markdown_doc + - origin - updated_at - user_id filter: @@ -1673,4 +1716,33 @@ clear_older_than: 168 paused: true schedule: 0 0 * * * + - name: version_rollback_audit + definition: + enable_manual: false + insert: + columns: '*' + retry_conf: + interval_sec: 10 + num_retries: 3 + timeout_sec: 60 + webhook: '{{ACTIONS_URL}}/rpc/audit_version_rollback' + cleanup_config: + batch_size: 10000 + clean_invocation_logs: false + clear_older_than: 168 + paused: false + schedule: 0 0 * * * timeout: 60 +- table: + name: audit_logs + schema: public + object_relationships: + - name: user + using: + foreign_key_constraint_on: user_id + - name: datasource + using: + foreign_key_constraint_on: datasource_id + - name: branch + using: + foreign_key_constraint_on: branch_id diff --git a/services/hasura/migrations/1713600000000_dataschemas_delete_permission/README.md b/services/hasura/migrations/1713600000000_dataschemas_delete_permission/README.md new file mode 100644 index 00000000..bec2c767 --- /dev/null +++ b/services/hasura/migrations/1713600000000_dataschemas_delete_permission/README.md @@ -0,0 +1,76 @@ +# Migration `1713600000000_dataschemas_delete_permission` + +Feature: `011-model-mgmt-api`. + +## What it does + +SQL (in `up.sql`): +1. Adds `versions.origin TEXT NOT NULL DEFAULT 'user'` with a CHECK constraint on `('user','smart_gen','rollback')`. +2. Adds `versions.is_current BOOLEAN NOT NULL DEFAULT true`. +3. **Backfills** `versions.is_current = false` for every version that is not the newest of its branch. The backfill runs in 1 000-row batches inside a `DO` block so a large table does not hold a long row-level lock. +4. Installs the `versions_flip_is_current()` trigger function and the `versions_flip_is_current_trg` AFTER-INSERT trigger. The function takes a transaction-scoped advisory lock keyed on `hashtextextended(NEW.branch_id::text, 0)`, which serialises concurrent inserts **on the same branch** and eliminates the race where two inserts could both leave `is_current = true`. Inserts on different branches still proceed in parallel. +5. Creates `audit_logs` table + three indexes (`created_at DESC`, `user_id`, `action`). + +Hasura metadata (in `services/hasura/metadata/tables.yaml`): +- `dataschemas.delete_permissions` for role `user`: allowed only when the caller is owner/admin on the datasource's team AND the version is `is_current = true` AND the branch is `status = active`. +- `versions.event_triggers` adds `version_rollback_audit` → `{{ACTIONS_URL}}/rpc/audit_version_rollback`. +- `dataschemas.event_triggers` adds `delete_dataschema_audit` → `{{ACTIONS_URL}}/rpc/audit_dataschema_delete`. +- `audit_logs` table definition with admin-only permissions and FK object relationships. + +Cron (in `services/hasura/metadata/cron_triggers.yaml`): +- `audit_logs_retention_90d` fires daily, POSTs to `{{ACTIONS_URL}}/rpc/audit_logs_retention`. The RPC handler in `services/actions/src/rpc/auditLogsRetention.js` deletes rows older than 90 days. + +## Applying + +```bash +./cli.sh hasura cli "migrate apply" +./cli.sh hasura cli "metadata apply" +``` + +Metadata apply must run **after** the SQL migration — it references the new `versions.is_current` column in the delete permission filter. + +## Size / duration expectations + +The migration touches three rows per version row in the worst case (column add × 2 + backfill update). Rough guidance on a typical PG 14 instance with `versions` indexed by `(branch_id, created_at DESC)`: + +| `versions` row count | Backfill wall-clock (approx.) | +|---|---| +| 1 000 | < 1 s | +| 10 000 | a few seconds | +| 100 000 | seconds to low tens of seconds | +| 1 000 000+ | measure on staging first; consider a maintenance window | + +The backfill's batching bounds the worst-case lock held on individual rows, not the total wall-clock. On a busy writer workload, the total duration can still be longer than a single-batch estimate. + +## Rolling back + +```bash +./cli.sh hasura cli "migrate apply --down 1" +``` + +`down.sql` drops, in order: +- `audit_logs` indexes and table (data loss — audit records for the retention window are discarded); +- the `versions_flip_is_current_trg` trigger and its function; +- the `versions.is_current` and `versions.origin` columns. + +Metadata will diverge from the migrated state after a down; run `./cli.sh hasura cli "metadata apply"` once the baseline metadata (pre-feature) is checked out. + +## Pre-flight checklist + +- [ ] `SELECT count(*) FROM versions;` — if > 100 000, run the backfill in a maintenance window or accept a short write pause. +- [ ] Actions container is up and reachable on `ACTIONS_URL`. The event triggers start firing the instant metadata is applied; if Actions is unreachable, Hasura retries three times per the `retry_conf`, then silently logs the failure. +- [ ] The `hasura-migrations` container image must be rebuilt to include this migration directory. Kustomize: bump the image tag in `data/synmetrix/overlays//kustomization.yaml`. +- [ ] Client-v2 needs **no** changes — this migration is strictly additive. Existing GraphQL queries continue to work; `origin` and `is_current` are additional selectable columns, not required arguments on any existing mutation. + +## Post-flight checks + +```sql +-- Every branch has exactly one current version. +SELECT branch_id, count(*) FILTER (WHERE is_current) AS c, count(*) t +FROM versions GROUP BY branch_id HAVING count(*) FILTER (WHERE is_current) <> 1; +-- (expect 0 rows) + +-- Audit writer is reachable. +SELECT count(*) FROM audit_logs WHERE created_at > now() - interval '1 hour'; +-- (non-zero after the first delete or rollback) +``` diff --git a/services/hasura/migrations/1713600000000_dataschemas_delete_permission/down.sql b/services/hasura/migrations/1713600000000_dataschemas_delete_permission/down.sql new file mode 100644 index 00000000..66de41c9 --- /dev/null +++ b/services/hasura/migrations/1713600000000_dataschemas_delete_permission/down.sql @@ -0,0 +1,11 @@ +-- Reverse of up.sql for feature 011-model-mgmt-api. +DROP INDEX IF EXISTS public.audit_logs_action_idx; +DROP INDEX IF EXISTS public.audit_logs_user_id_idx; +DROP INDEX IF EXISTS public.audit_logs_created_at_idx; +DROP TABLE IF EXISTS public.audit_logs; + +DROP TRIGGER IF EXISTS versions_flip_is_current_trg ON public.versions; +DROP FUNCTION IF EXISTS public.versions_flip_is_current(); + +ALTER TABLE public.versions DROP COLUMN IF EXISTS is_current; +ALTER TABLE public.versions DROP COLUMN IF EXISTS origin; diff --git a/services/hasura/migrations/1713600000000_dataschemas_delete_permission/up.sql b/services/hasura/migrations/1713600000000_dataschemas_delete_permission/up.sql new file mode 100644 index 00000000..047b5d5a --- /dev/null +++ b/services/hasura/migrations/1713600000000_dataschemas_delete_permission/up.sql @@ -0,0 +1,111 @@ +-- Model Management API — feature 011-model-mgmt-api +-- See specs/011-model-mgmt-api/research.md §R4 and §R12. +-- +-- Safe on large `versions` tables: +-- - column adds are metadata-only (PG11+ ADD COLUMN with DEFAULT) +-- - `is_current` backfill runs in 1000-row batches to avoid long locks +-- - the flip trigger takes an advisory lock keyed on branch_id to +-- eliminate the concurrent-insert race + +-- 1. versions.origin — distinguish user/smart_gen/rollback commits. +ALTER TABLE public.versions + ADD COLUMN IF NOT EXISTS origin TEXT NOT NULL DEFAULT 'user' + CHECK (origin IN ('user', 'smart_gen', 'rollback')); + +-- 2. versions.is_current — exactly one current row per branch. +ALTER TABLE public.versions + ADD COLUMN IF NOT EXISTS is_current BOOLEAN NOT NULL DEFAULT true; + +-- 3. Backfill in batches. Every branch's newest row stays is_current=true; +-- older rows flip to false. Runs in a DO block so the UPDATE can commit +-- incrementally on large tables. IF NOT EXISTS above makes this rerun-safe. +DO $backfill$ +DECLARE + batch_size INTEGER := 1000; + updated_rows INTEGER; +BEGIN + LOOP + WITH candidates AS ( + SELECT v.id + FROM public.versions v + WHERE v.is_current = true + AND (v.branch_id, v.created_at) NOT IN ( + SELECT branch_id, MAX(created_at) + FROM public.versions + GROUP BY branch_id + ) + LIMIT batch_size + ) + UPDATE public.versions + SET is_current = false + WHERE id IN (SELECT id FROM candidates); + + GET DIAGNOSTICS updated_rows = ROW_COUNT; + EXIT WHEN updated_rows = 0; + END LOOP; +END +$backfill$; + +-- 4. Statement-level trigger that enforces the invariant correctly for both +-- single-row and multi-row inserts. Uses a transition table (PG 10+) so one +-- fire of the function sees every newly-inserted row and picks the single +-- winner per affected branch. An advisory lock keyed on a stable hash of the +-- affected branch ids serialises concurrent inserts across transactions; inserts +-- touching disjoint branches proceed in parallel. +CREATE OR REPLACE FUNCTION public.versions_flip_is_current() +RETURNS TRIGGER AS $$ +DECLARE + lock_key BIGINT; +BEGIN + -- Combined xact-scoped advisory lock across every affected branch. Disjoint + -- inserts hash to different keys; overlapping inserts serialise. + SELECT COALESCE( + SUM(hashtextextended(bid::text, 0)), + 0 + )::BIGINT + INTO lock_key + FROM (SELECT DISTINCT branch_id AS bid FROM new_versions) b; + PERFORM pg_advisory_xact_lock(lock_key); + + WITH affected AS ( + SELECT DISTINCT branch_id FROM new_versions + ), + winners AS ( + SELECT DISTINCT ON (v.branch_id) v.id + FROM public.versions v + JOIN affected a USING (branch_id) + ORDER BY v.branch_id, v.created_at DESC, v.id DESC + ) + UPDATE public.versions v + SET is_current = (v.id IN (SELECT id FROM winners)) + FROM affected a + WHERE v.branch_id = a.branch_id; + + RETURN NULL; +END; +$$ LANGUAGE plpgsql; + +DROP TRIGGER IF EXISTS versions_flip_is_current_trg ON public.versions; +CREATE TRIGGER versions_flip_is_current_trg +AFTER INSERT ON public.versions +REFERENCING NEW TABLE AS new_versions +FOR EACH STATEMENT +EXECUTE FUNCTION public.versions_flip_is_current(); + +-- 5. audit_logs — durable audit store for delete + rollback (FR-016). +CREATE TABLE IF NOT EXISTS public.audit_logs ( + id uuid PRIMARY KEY DEFAULT gen_random_uuid(), + action text NOT NULL CHECK (action IN ('dataschema_delete', 'version_rollback')), + user_id uuid NOT NULL REFERENCES public.users(id) ON DELETE CASCADE, + datasource_id uuid REFERENCES public.datasources(id) ON DELETE SET NULL, + branch_id uuid REFERENCES public.branches(id) ON DELETE SET NULL, + target_id uuid NOT NULL, + outcome text NOT NULL CHECK (outcome IN ('success', 'failure')), + error_code text, + payload jsonb, + created_at timestamptz NOT NULL DEFAULT now() +); + +CREATE INDEX IF NOT EXISTS audit_logs_created_at_idx ON public.audit_logs (created_at DESC); +CREATE INDEX IF NOT EXISTS audit_logs_user_id_idx ON public.audit_logs (user_id); +CREATE INDEX IF NOT EXISTS audit_logs_action_idx ON public.audit_logs (action); diff --git a/specs/011-model-mgmt-api/DEPLOYMENT.md b/specs/011-model-mgmt-api/DEPLOYMENT.md new file mode 100644 index 00000000..51756099 --- /dev/null +++ b/specs/011-model-mgmt-api/DEPLOYMENT.md @@ -0,0 +1,134 @@ +# Deployment Runbook — Model Management API (011-model-mgmt-api) + +## Artefacts shipped + +- **`services/cubejs`**: 6 new routes, 10 new utilities, refactored `metaAll.js`. Image: `quicklookup/synmetrix-cube`. +- **`services/actions`**: 3 new RPC handlers (`auditDataschemaDelete`, `auditVersionRollback`, `auditLogsRetention`). Image: `quicklookup/synmetrix-actions`. +- **`services/hasura`**: one new migration (`1713600000000_dataschemas_delete_permission`) + metadata changes (delete_permissions, event triggers, audit_logs table, cron trigger). Image: `quicklookup/synmetrix-hasura-migrations`. + +## Order of operations + +Use the Kustomize overlay flow in the `cxs` repo. **Deploy Hasura migrations image + metadata apply before the cube/actions rollouts** — routes assume `audit_logs` exists and `versions.origin` / `versions.is_current` columns are present. + +1. Merge this branch to `main` in `synmetrix`. CI builds the three images under tags `{short-sha}`, `{branch}-{short-sha}`, `{branch}`, `latest`. +2. In `cxs` repo, bump `newTag` for all three images in `data/synmetrix/overlays/staging/kustomization.yaml`. Apply to staging first. Watch the `hasura-migrations` job log for the backfill line count. +3. Smoke-check staging (see [Post-deploy checks](#post-deploy-checks)). +4. Promote to production by bumping the same tags in `data/synmetrix/overlays/production/kustomization.yaml`. + +## Pre-deploy risk checks + +Run against the **target** database before bumping tags: + +```sql +-- A. How much data will the backfill touch? +SELECT count(*) AS total_versions, + count(DISTINCT branch_id) AS branches, + max(ct) AS max_per_branch + FROM (SELECT branch_id, count(*) ct FROM versions GROUP BY branch_id) q; + +-- B. Storage budget for audit_logs (90-day retention at current delete/rollback cadence) +SELECT count(*) FILTER ( + WHERE NOT (origin = 'rollback') +) AS rollback_candidates FROM versions; +``` + +If `total_versions > 100 000`: run the backfill in a low-traffic window. The DO-block in `up.sql` already batches at 1 000 rows, but the whole thing still takes one long migration and metadata is applied at the end. + +## Migration details + +The migration (`services/hasura/migrations/1713600000000_dataschemas_delete_permission/up.sql`): + +- Adds `versions.origin text NOT NULL DEFAULT 'user'` with CHECK `('user','smart_gen','rollback')`. +- Adds `versions.is_current boolean NOT NULL DEFAULT true`. +- Backfills `is_current = false` on older versions **in batches of 1 000**. +- Installs a **statement-level** `versions_flip_is_current_trg` trigger using a NEW TABLE transition table, which handles bulk inserts correctly (row-level triggers would break the invariant on multi-row inserts — verified). +- The trigger takes a transaction-scoped advisory lock summing the hashes of affected branches, so concurrent inserts on the same branch serialise; different branches still insert in parallel. **Verified under concurrent 10×2 bulk inserts on the same branch — invariant held.** +- Creates `audit_logs` with three indexes. + +The corresponding metadata changes land in `services/hasura/metadata/tables.yaml` and `services/hasura/metadata/cron_triggers.yaml`. + +## Post-deploy checks + +Run after each environment's rollout. + +### 1. Migration applied + invariant holds + +```sql +SELECT branch_id, count(*) FILTER (WHERE is_current) AS c + FROM versions GROUP BY branch_id + HAVING count(*) FILTER (WHERE is_current) <> 1; +-- expect 0 rows +``` + +### 2. Event triggers reachable + +Trigger a delete and confirm the audit row appears within a few seconds: + +```bash +# Where $DS_ID owns $TARGET via an owner/admin caller +curl -X DELETE -H "Authorization: Bearer $TOKEN" \ + https://$HOST/api/v1/dataschema/$TARGET +# Expect 200 with {deleted: true, dataschemaId: $TARGET} + +# Wait 2 s then: +``` +```sql +SELECT * FROM audit_logs + WHERE action = 'dataschema_delete' + AND target_id = '' + ORDER BY created_at DESC LIMIT 1; +-- outcome = success, error_code = NULL, created_at within the last minute +``` + +### 3. Refresh + meta-all latency baseline + +```bash +curl -s -w "refresh: %{time_total}s\n" -o /dev/null -X POST \ + -H "Authorization: Bearer $TOKEN" -H "Content-Type: application/json" \ + -d "{\"branchId\":\"$BRANCH\"}" \ + https://$HOST/api/v1/internal/refresh-compiler + +curl -s -w "meta-all: %{time_total}s (bytes %{size_download})\n" -o /dev/null \ + -H "Authorization: Bearer $TOKEN" https://$HOST/api/v1/meta-all +``` + +On the local dev stack (15 datasources, 17 cubes total) `/meta-all` averages ~40 ms warm. A production tenant will be higher — flag if it exceeds 2 s. + +### 4. Client-v2 regression + +Exercise at least: +- `/v1/graphql` proxy (any GraphQL query the frontend runs — `users(limit:1)` is enough). +- Model editor load → save → re-open (exercises `versions.insert` → new trigger → audit trigger for origin='rollback' case is skipped, generateDataSchema sets origin='user'). +- Data source switch → catalog load (`/api/v1/meta-all`). + +Expect identical UI behaviour. The two new cube-envelope fields (`dataschema_id`, `file_name`) are additive; existing code ignores unknown fields. + +## Rollback + +```bash +./cli.sh hasura cli "migrate apply --version 1713600000000 --type down" +./cli.sh hasura cli "metadata apply" # after reverting metadata files +``` + +`down.sql` drops, in order: `audit_logs` indexes and table (data loss), the trigger + function, the `is_current` and `origin` columns. + +Rolling back the Hasura migration **only** (leaving the cube/actions image rolled forward) will cause `delete`, `rollback`, `validate-in-branch replace/preview-delete` to fail with 503/hasura_unavailable — the handlers assume the new columns exist. Roll cube + actions back in the same sweep. + +## Deferred items (not blocking deploy) + +- Tychi skill doc in `cxs-agents` repo (T013i + T048) — separate PR. +- `rollback_source_columns_missing` check in `versionRollback.js` — driver round-trip; Hasura errors surface via `hasura_rejected` in the interim. +- StepCI `model-management/` folder is not yet wired into `tests/stepci/workflow.yml`'s `include:` list — operator runs it standalone for now. + +## Risk summary + +| Risk | Severity | Mitigated by | +|---|---|---| +| Long backfill on large `versions` table | Medium | Batched at 1 000 rows; staging rehearsal recommended for >100k | +| Trigger write amplification per version insert | Low | Statement-level, one UPDATE per affected branch, indexed; measured O(branch size) | +| Concurrent-insert race on `is_current` | Resolved | Advisory lock keyed on affected-branch hash sum; stress-tested 2×10 concurrent bulk inserts | +| Multi-row INSERT breaks invariant | Resolved | Statement-level trigger with transition table | +| `audit_logs` unbounded growth | Low | Daily cron enforces 90-day retention (`audit_logs_retention` RPC) | +| Actions RPC handlers fail to load | Low | Tests load all three with HASURA unreachable; handlers return `{ok:false}` rather than crash | +| `/meta-all` extra CPU per call (per-schema YAML parse) | Low | Local baseline 36–48 ms warm; watch `p95` on staging before promoting | +| `dataschema_id` / `file_name` on cube summaries | Low | Purely additive; unit-tested; no downstream consumer breaks | diff --git a/specs/011-model-mgmt-api/checklists/requirements.md b/specs/011-model-mgmt-api/checklists/requirements.md new file mode 100644 index 00000000..2a427769 --- /dev/null +++ b/specs/011-model-mgmt-api/checklists/requirements.md @@ -0,0 +1,37 @@ +# Specification Quality Checklist: Model Management API + +**Purpose**: Validate specification completeness and quality before proceeding to planning +**Created**: 2026-04-20 +**Feature**: [spec.md](../spec.md) + +## Content Quality + +- [~] No implementation details (languages, frameworks, APIs) — **partial**: the spec references existing platform concepts (dataschemas, versions, branches, compiler cache, `findUser` ordering semantic) by name because the feature is an extension of an existing system. Implementation file paths have been moved out of the spec into `plan.md` and `tasks.md`. This trade-off is noted in the Context section. +- [x] Focused on user value and business needs +- [~] Written for non-technical stakeholders — **partial**: stakeholders familiar with the existing Synmetrix platform can read the spec directly; a pure business reader would need the Context section as a primer. +- [x] All mandatory sections completed + +## Requirement Completeness + +- [x] No [NEEDS CLARIFICATION] markers remain +- [x] Requirements are testable and unambiguous +- [x] Success criteria are measurable +- [x] Success criteria are technology-agnostic (no implementation details) +- [x] All acceptance scenarios are defined +- [x] Edge cases are identified +- [x] Scope is clearly bounded +- [x] Dependencies and assumptions identified + +## Feature Readiness + +- [x] All functional requirements have clear acceptance criteria +- [x] User scenarios cover primary flows +- [x] Feature meets measurable outcomes defined in Success Criteria +- [x] No implementation details leak into specification + +## Notes + +- Items marked incomplete require spec updates before `/speckit.clarify` or `/speckit.plan`. +- Items marked `[~]` (partial) are deliberate trade-offs, not gaps: the spec extends an existing platform and references its named concepts by necessity. Implementation file paths have been moved out of the spec into `plan.md` and `tasks.md`. A pure product/engineering split would require an upstream document describing the platform itself; that is out of scope for this feature. +- Five user stories (three P1, two P2) each map cleanly to an independently testable capability; any single P1 story delivers a usable increment to the Tychi agent workflow. +- Eight measurable success criteria cover functional correctness, latency, refactor safety, payload efficiency, auditability, and authorisation. diff --git a/specs/011-model-mgmt-api/contracts/delete-dataschema.yaml b/specs/011-model-mgmt-api/contracts/delete-dataschema.yaml new file mode 100644 index 00000000..dbc58199 --- /dev/null +++ b/specs/011-model-mgmt-api/contracts/delete-dataschema.yaml @@ -0,0 +1,129 @@ +openapi: 3.1.0 +info: + title: Synmetrix Model Management — Dataschema Deletion + version: "011.2" +paths: + /api/v1/dataschema/{dataschemaId}: + delete: + operationId: deleteDataschema + summary: Delete a dataschema from the latest version of the active branch. + description: | + Removes the dataschema row via Hasura (`delete_dataschemas_by_pk`). Blocked if: + - The caller lacks owner/admin role on the datasource team (FR-006) → 403. + - The dataschema belongs to a version that is NOT the latest version of the + active branch (FR-007) → 409. + - Any other cube on the same branch references the target cube (FR-008) → 409. + The seven reference kinds detected are enumerated in FR-008 and in the + BlockingReference enum below. + + AUTH: dataschema-scoped endpoint. Direct-verify; no `x-hasura-datasource-id` + header required or accepted. The handler resolves dataschema → version → + branch → datasource server-side from the path parameter. + security: + - BearerAuth: [] + parameters: + - in: path + name: dataschemaId + required: true + schema: + type: string + format: uuid + responses: + "200": + description: Dataschema deleted. + content: + application/json: + schema: { $ref: '#/components/schemas/DeleteDataschemaResponse' } + "403": + description: | + Authorization failure. Possible codes: + - `delete_blocked_authorization` — caller lacks owner/admin role on the datasource team. + Missing or malformed JWT also returns 403 with a non-business-specific code. + content: + application/json: + schema: { $ref: '#/components/schemas/ErrorResponse' } + "404": + description: Dataschema not found or not visible to caller. + content: + application/json: + schema: { $ref: '#/components/schemas/ErrorResponse' } + "409": + description: | + Delete blocked by a structural constraint. Possible codes: + - `delete_blocked_by_references` — cross-cube references still depend on the target (FR-008). + - `delete_blocked_historical_version` — the dataschema is not on the latest version of the active branch (FR-007). + content: + application/json: + schema: { $ref: '#/components/schemas/DeleteBlockedResponse' } + +components: + securitySchemes: + BearerAuth: + type: http + scheme: bearer + bearerFormat: JWT + schemas: + ErrorCode: + type: string + description: | + Canonical Synmetrix Model-Management API error code (FR-017). + Single source of truth: services/cubejs/src/utils/errorCodes.js. + enum: + - validate_invalid_mode + - validate_target_not_found + - validate_unresolved_reference + - refresh_branch_not_visible + - refresh_unauthorized + - delete_blocked_by_references + - delete_blocked_historical_version + - delete_blocked_authorization + - cube_not_found + - diff_cross_branch + - diff_invalid_request + - rollback_version_not_on_branch + - rollback_blocked_authorization + - rollback_invalid_request + - rollback_source_columns_missing + ErrorResponse: + type: object + required: [code, message] + properties: + code: { $ref: '#/components/schemas/ErrorCode' } + message: { type: string } + DeleteDataschemaResponse: + type: object + required: [deleted, dataschemaId] + properties: + deleted: { type: boolean } + dataschemaId: { type: string, format: uuid } + DeleteBlockedResponse: + type: object + required: [code, message] + properties: + code: + type: string + description: Subset of ErrorCode applicable to 409 outcomes. + enum: + - delete_blocked_by_references + - delete_blocked_historical_version + message: { type: string } + blockingReferences: + type: array + description: Present when code=delete_blocked_by_references. + items: + type: object + required: [referringCube, file, referenceKind] + properties: + referringCube: { type: string } + file: { type: string } + referenceKind: + type: string + enum: + - joins + - extends + - sub_query + - formula + - segment + - pre_aggregation + - filter_params + line: { type: [integer, "null"] } diff --git a/specs/011-model-mgmt-api/contracts/meta-single-cube.yaml b/specs/011-model-mgmt-api/contracts/meta-single-cube.yaml new file mode 100644 index 00000000..496436f4 --- /dev/null +++ b/specs/011-model-mgmt-api/contracts/meta-single-cube.yaml @@ -0,0 +1,111 @@ +openapi: 3.1.0 +info: + title: Synmetrix Model Management — Single-Cube Metadata + version: "011.2" +paths: + /api/v1/meta/cube/{cubeName}: + get: + operationId: getSingleCubeMeta + summary: Fetch compiled metadata for a single cube on a specific branch. + description: | + Returns only the named cube's compiled metadata envelope. Uses the same visibility + and access-list filtering as the aggregate catalog endpoint (FR-010). + 404 if the cube does not exist on the branch (FR-009) — never an empty list. + Always reads the **latest version** of the requested branch. Historical-version + reads are out of scope for this feature (see US4 acceptance criteria). + + AUTH: datasource-scoped endpoint. Uses the existing `checkAuthMiddleware`; + `x-hasura-datasource-id` is mandatory by contract. + security: + - BearerAuth: [] + parameters: + - in: path + name: cubeName + required: true + schema: + type: string + - in: header + name: x-hasura-datasource-id + required: true + schema: + type: string + format: uuid + - in: header + name: x-hasura-branch-id + required: false + schema: + type: string + format: uuid + description: Optional; defaults to the datasource's active branch. The latest version of whichever branch is resolved is always used. + responses: + "200": + description: Cube metadata envelope. + content: + application/json: + schema: { $ref: '#/components/schemas/SingleCubeMeta' } + "403": + description: Missing/invalid auth or insufficient visibility. + content: + application/json: + schema: { $ref: '#/components/schemas/ErrorResponse' } + "404": + description: Cube not found on the branch (FR-009). Returns `cube_not_found`. + content: + application/json: + schema: { $ref: '#/components/schemas/ErrorResponse' } + +components: + securitySchemes: + BearerAuth: + type: http + scheme: bearer + bearerFormat: JWT + schemas: + ErrorCode: + type: string + description: | + Canonical Synmetrix Model-Management API error code (FR-017). + Single source of truth: services/cubejs/src/utils/errorCodes.js. + enum: + - validate_invalid_mode + - validate_target_not_found + - validate_unresolved_reference + - refresh_branch_not_visible + - refresh_unauthorized + - delete_blocked_by_references + - delete_blocked_historical_version + - delete_blocked_authorization + - cube_not_found + - diff_cross_branch + - diff_invalid_request + - rollback_version_not_on_branch + - rollback_blocked_authorization + - rollback_invalid_request + - rollback_source_columns_missing + ErrorResponse: + type: object + required: [code, message] + properties: + code: { $ref: '#/components/schemas/ErrorCode' } + message: { type: string } + SingleCubeMeta: + type: object + required: [cube, datasourceId, branchId, versionId] + properties: + cube: + type: object + required: [name, measures, dimensions, segments] + properties: + name: { type: string } + title: { type: [string, "null"] } + description: { type: [string, "null"] } + public: { type: boolean } + measures: { type: array, items: { type: object } } + dimensions: { type: array, items: { type: object } } + segments: { type: array, items: { type: object } } + hierarchies: { type: array, items: { type: object } } + meta: + type: [object, "null"] + datasourceId: { type: string, format: uuid } + branchId: { type: string, format: uuid } + versionId: { type: string, format: uuid } diff --git a/specs/011-model-mgmt-api/contracts/refresh-compiler.yaml b/specs/011-model-mgmt-api/contracts/refresh-compiler.yaml new file mode 100644 index 00000000..93377fc0 --- /dev/null +++ b/specs/011-model-mgmt-api/contracts/refresh-compiler.yaml @@ -0,0 +1,105 @@ +openapi: 3.1.0 +info: + title: Synmetrix Model Management — Compiler Cache Refresh + version: "011.2" +paths: + /api/v1/internal/refresh-compiler: + post: + operationId: refreshCompiler + summary: Asynchronously invalidate the compiled-model cache for a branch. + description: | + Evicts LRU entries whose appId suffix matches the branch's current schemaVersion. + Returns immediately after eviction; recompilation happens on the next metadata/query + request and any compile error surfaces there (FR-004a). + Does NOT touch the pre-aggregation cache or user-scope caches (FR-004, Clarification Q1). + Idempotent per (branch, schemaVersion) pair (FR-005) — two refreshes against the + same schemaVersion do the same work; a schemaVersion change between calls is a + different logical operation. + + AUTH: branch-scoped endpoint. Direct-verify; no `x-hasura-datasource-id` header + required or accepted. The handler resolves the datasource server-side from the + branch id in the body. **Requires owner or admin role** on the target datasource's + team (FR-015) — refresh affects the compiled view that every other user of the + branch sees, so it is gated at the same bar as delete and rollback. + security: + - BearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/RefreshCompilerRequest' + responses: + "200": + description: Eviction complete. + content: + application/json: + schema: { $ref: '#/components/schemas/RefreshCompilerResponse' } + "400": + description: Malformed request. + content: + application/json: + schema: { $ref: '#/components/schemas/ErrorResponse' } + "403": + description: Missing/invalid auth, or caller lacks owner/admin role on the datasource team. + content: + application/json: + schema: { $ref: '#/components/schemas/ErrorResponse' } + "404": + description: Branch not found or not visible to the caller. + content: + application/json: + schema: { $ref: '#/components/schemas/ErrorResponse' } + +components: + securitySchemes: + BearerAuth: + type: http + scheme: bearer + bearerFormat: JWT + schemas: + ErrorCode: + type: string + description: | + Canonical Synmetrix Model-Management API error code (FR-017). + Single source of truth: services/cubejs/src/utils/errorCodes.js. + enum: + - validate_invalid_mode + - validate_target_not_found + - validate_unresolved_reference + - refresh_branch_not_visible + - refresh_unauthorized + - delete_blocked_by_references + - delete_blocked_historical_version + - delete_blocked_authorization + - cube_not_found + - diff_cross_branch + - diff_invalid_request + - rollback_version_not_on_branch + - rollback_blocked_authorization + - rollback_invalid_request + - rollback_source_columns_missing + ErrorResponse: + type: object + required: [code, message] + properties: + code: { $ref: '#/components/schemas/ErrorCode' } + message: { type: string } + RefreshCompilerRequest: + type: object + required: [branchId] + properties: + branchId: + type: string + format: uuid + RefreshCompilerResponse: + type: object + required: [evicted, schemaVersion] + properties: + evicted: + type: integer + minimum: 0 + description: Count of LRU entries removed (may be 0 if the branch had never been compiled). + schemaVersion: + type: string + description: The md5 hash whose cache entries were targeted. diff --git a/specs/011-model-mgmt-api/contracts/validate-in-branch.yaml b/specs/011-model-mgmt-api/contracts/validate-in-branch.yaml new file mode 100644 index 00000000..a32e4f2f --- /dev/null +++ b/specs/011-model-mgmt-api/contracts/validate-in-branch.yaml @@ -0,0 +1,156 @@ +openapi: 3.1.0 +info: + title: Synmetrix Model Management — Contextual Validation + version: "011.2" +paths: + /api/v1/validate-in-branch: + post: + operationId: validateInBranch + summary: Validate a draft model file against a branch's deployed cubes + description: | + Compiles the target branch's existing dataschemas together with the submitted draft + and returns a structured compile report. Modes: append (new file), replace (overwrite + an existing dataschema's code), preview-delete (simulate removal of a dataschema). + Returns 200 for valid OR structurally compiled drafts (the `valid` flag distinguishes). + Returns 4xx only for malformed requests or auth failures. + + AUTH: branch-scoped endpoint. Uses direct-verify (mirrors /api/v1/meta-all), NOT + the datasource-scoped checkAuthMiddleware. No `x-hasura-datasource-id` header is + required or accepted — the branch id in the body is the sole scoping key. + Mode `append` accepts team members; modes `replace` and `preview-delete` require + owner or admin role on the datasource team (FR-015). + security: + - BearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ValidateInBranchRequest' + responses: + "200": + description: Compile report produced. + content: + application/json: + schema: + $ref: '#/components/schemas/CompileReport' + "400": + description: Malformed request (bad mode/field combination, non-uuid branchId, missing draft/target). + content: + application/json: + schema: { $ref: '#/components/schemas/ErrorResponse' } + "403": + description: Missing/invalid auth, or caller lacks owner/admin role for a mutating mode. + content: + application/json: + schema: { $ref: '#/components/schemas/ErrorResponse' } + "404": + description: Branch or target dataschema not found / not visible to caller. + content: + application/json: + schema: { $ref: '#/components/schemas/ErrorResponse' } + +components: + securitySchemes: + BearerAuth: + type: http + scheme: bearer + bearerFormat: JWT + schemas: + ErrorCode: + type: string + description: | + Canonical Synmetrix Model-Management API error code (FR-017). + Single source of truth: services/cubejs/src/utils/errorCodes.js. + enum: + - validate_invalid_mode + - validate_target_not_found + - validate_unresolved_reference + - refresh_branch_not_visible + - refresh_unauthorized + - delete_blocked_by_references + - delete_blocked_historical_version + - delete_blocked_authorization + - cube_not_found + - diff_cross_branch + - diff_invalid_request + - rollback_version_not_on_branch + - rollback_blocked_authorization + - rollback_invalid_request + - rollback_source_columns_missing + ErrorResponse: + type: object + required: [code, message] + properties: + code: { $ref: '#/components/schemas/ErrorCode' } + message: { type: string } + ValidateInBranchRequest: + type: object + required: [branchId, mode] + properties: + branchId: + type: string + format: uuid + mode: + type: string + enum: [append, replace, preview-delete] + draft: + $ref: '#/components/schemas/DraftFile' + targetDataschemaId: + type: string + format: uuid + allOf: + - description: > + Mode-conditional fields: + - append: draft REQUIRED, targetDataschemaId FORBIDDEN + - replace: draft REQUIRED, targetDataschemaId REQUIRED + - preview-delete: draft FORBIDDEN, targetDataschemaId REQUIRED + DraftFile: + type: object + required: [fileName, content] + properties: + fileName: + type: string + pattern: '^[A-Za-z0-9_\-.]+\.(yml|yaml|js)$' + content: + type: string + maxLength: 1048576 + CompileReport: + type: object + required: [valid, errors, warnings] + properties: + valid: + type: boolean + errors: + type: array + items: { $ref: '#/components/schemas/CompileDiagnostic' } + warnings: + type: array + items: { $ref: '#/components/schemas/CompileDiagnostic' } + blockingReferences: + type: array + description: Present only when mode=preview-delete and valid=false. + items: { $ref: '#/components/schemas/BlockingReference' } + CompileDiagnostic: + type: object + required: [severity, message] + properties: + severity: { type: string, enum: [error, warning] } + message: { type: string } + fileName: { type: [string, "null"] } + startLine: { type: [integer, "null"] } + startColumn: { type: [integer, "null"] } + endLine: { type: [integer, "null"] } + endColumn: { type: [integer, "null"] } + code: + $ref: '#/components/schemas/ErrorCode' + BlockingReference: + type: object + required: [referringCube, file, referenceKind] + properties: + referringCube: { type: string } + file: { type: string } + referenceKind: + type: string + enum: [joins, extends, sub_query, formula, segment, pre_aggregation, filter_params] + line: { type: [integer, "null"] } diff --git a/specs/011-model-mgmt-api/contracts/version-diff.yaml b/specs/011-model-mgmt-api/contracts/version-diff.yaml new file mode 100644 index 00000000..9b293d76 --- /dev/null +++ b/specs/011-model-mgmt-api/contracts/version-diff.yaml @@ -0,0 +1,130 @@ +openapi: 3.1.0 +info: + title: Synmetrix Model Management — Version Diff + version: "011.2" +paths: + /api/v1/version/diff: + post: + operationId: versionDiff + summary: Structured diff between two versions on the same branch. + description: | + Returns added/removed/modified cubes with per-field-level changes (measures, + dimensions, segments, meta). Rejects cross-branch pairs with `diff_cross_branch` + (FR-012). + + AUTH: version-scoped endpoint. Direct-verify; no `x-hasura-datasource-id` + header required or accepted. The handler resolves branch and datasource + server-side from each version id in the body. Requires team membership. + security: + - BearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: { $ref: '#/components/schemas/VersionDiffRequest' } + responses: + "200": + description: Diff produced. + content: + application/json: + schema: { $ref: '#/components/schemas/VersionDiffResponse' } + "400": + description: | + Malformed request. Possible codes: `diff_cross_branch`, `diff_invalid_request`. + content: + application/json: + schema: { $ref: '#/components/schemas/ErrorResponse' } + "403": + description: Missing/invalid auth or insufficient visibility. + content: + application/json: + schema: { $ref: '#/components/schemas/ErrorResponse' } + "404": + description: One of the versions not found or not visible. + content: + application/json: + schema: { $ref: '#/components/schemas/ErrorResponse' } + +components: + securitySchemes: + BearerAuth: + type: http + scheme: bearer + bearerFormat: JWT + schemas: + ErrorCode: + type: string + description: | + Canonical Synmetrix Model-Management API error code (FR-017). + Single source of truth: services/cubejs/src/utils/errorCodes.js. + enum: + - validate_invalid_mode + - validate_target_not_found + - validate_unresolved_reference + - refresh_branch_not_visible + - refresh_unauthorized + - delete_blocked_by_references + - delete_blocked_historical_version + - delete_blocked_authorization + - cube_not_found + - diff_cross_branch + - diff_invalid_request + - rollback_version_not_on_branch + - rollback_blocked_authorization + - rollback_invalid_request + - rollback_source_columns_missing + ErrorResponse: + type: object + required: [code, message] + properties: + code: { $ref: '#/components/schemas/ErrorCode' } + message: { type: string } + VersionDiffRequest: + type: object + required: [fromVersionId, toVersionId] + properties: + fromVersionId: { type: string, format: uuid } + toVersionId: { type: string, format: uuid } + VersionDiffResponse: + type: object + required: [branchId, fromVersionId, toVersionId, addedCubes, removedCubes, modifiedCubes] + properties: + branchId: { type: string, format: uuid } + fromVersionId: { type: string, format: uuid } + toVersionId: { type: string, format: uuid } + addedCubes: + type: array + items: + type: object + required: [cubeName, file] + properties: + cubeName: { type: string } + file: { type: string } + removedCubes: + type: array + items: + type: object + required: [cubeName, file] + properties: + cubeName: { type: string } + file: { type: string } + modifiedCubes: + type: array + items: + type: object + required: [cubeName, file, changes] + properties: + cubeName: { type: string } + file: { type: string } + changes: + type: array + items: + type: object + required: [field, added, removed, modified] + properties: + field: + type: string + enum: [measures, dimensions, segments, meta] + added: { type: array, items: { type: string } } + removed: { type: array, items: { type: string } } + modified: { type: array, items: { type: string } } diff --git a/specs/011-model-mgmt-api/contracts/version-rollback.yaml b/specs/011-model-mgmt-api/contracts/version-rollback.yaml new file mode 100644 index 00000000..b0538594 --- /dev/null +++ b/specs/011-model-mgmt-api/contracts/version-rollback.yaml @@ -0,0 +1,100 @@ +openapi: 3.1.0 +info: + title: Synmetrix Model Management — Version Rollback + version: "011.2" +paths: + /api/v1/version/rollback: + post: + operationId: rollbackVersion + summary: Clone a prior version's dataschemas into a new active version. + description: | + Creates a new `versions` row on the specified branch whose dataschemas are + byte-identical clones of the target version's dataschemas. Does NOT touch + explorations, alerts, or any other version-bound records (FR-013a, + Clarification Q3). Version history is preserved (FR-014). + The new version's `origin` column is set to `rollback` for audit. + + AUTH: branch-scoped endpoint. Direct-verify; no `x-hasura-datasource-id` + header required or accepted. The handler resolves the datasource server-side + from the branch id in the body. Requires owner or admin role on the + datasource team (FR-015). + security: + - BearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: { $ref: '#/components/schemas/RollbackRequest' } + responses: + "200": + description: Rollback complete. + content: + application/json: + schema: { $ref: '#/components/schemas/RollbackResponse' } + "400": + description: | + Malformed request or rollback precondition failed. Possible codes: + `rollback_version_not_on_branch`, `rollback_invalid_request`, + `rollback_source_columns_missing`. + content: + application/json: + schema: { $ref: '#/components/schemas/ErrorResponse' } + "403": + description: Missing/invalid auth or caller lacks owner/admin role on the datasource team. + content: + application/json: + schema: { $ref: '#/components/schemas/ErrorResponse' } + "404": + description: Branch or version not found / not visible. + content: + application/json: + schema: { $ref: '#/components/schemas/ErrorResponse' } + +components: + securitySchemes: + BearerAuth: + type: http + scheme: bearer + bearerFormat: JWT + schemas: + ErrorCode: + type: string + description: | + Canonical Synmetrix Model-Management API error code (FR-017). + Single source of truth: services/cubejs/src/utils/errorCodes.js. + enum: + - validate_invalid_mode + - validate_target_not_found + - validate_unresolved_reference + - refresh_branch_not_visible + - refresh_unauthorized + - delete_blocked_by_references + - delete_blocked_historical_version + - delete_blocked_authorization + - cube_not_found + - diff_cross_branch + - diff_invalid_request + - rollback_version_not_on_branch + - rollback_blocked_authorization + - rollback_invalid_request + - rollback_source_columns_missing + ErrorResponse: + type: object + required: [code, message] + properties: + code: { $ref: '#/components/schemas/ErrorCode' } + message: { type: string } + RollbackRequest: + type: object + required: [branchId, toVersionId] + properties: + branchId: { type: string, format: uuid } + toVersionId: { type: string, format: uuid } + RollbackResponse: + type: object + required: [newVersionId, clonedDataschemaCount] + properties: + newVersionId: { type: string, format: uuid } + clonedDataschemaCount: + type: integer + minimum: 0 diff --git a/specs/011-model-mgmt-api/data-model.md b/specs/011-model-mgmt-api/data-model.md new file mode 100644 index 00000000..28213548 --- /dev/null +++ b/specs/011-model-mgmt-api/data-model.md @@ -0,0 +1,382 @@ +# Phase 1 Data Model — Model Management API + +Date: 2026-04-20 +Branch: `011-model-mgmt-api` + +This feature introduces **no new persistent tables**. It refines the permissions on an existing table (`dataschemas`), adds one nullable column (`versions.origin`), and defines several transient in-memory entities used only inside request/response envelopes. + +--- + +## 1. Persistent entities (database) + +### 1.1 `dataschemas` (existing table — modified permissions only) + +| Field | Type | Existing? | Notes | +|---|---|---|---| +| `id` | uuid (PK) | existing | | +| `name` | text | existing | File name (`orders.yml`) | +| `code` | text | existing | Source code (YAML or JS) | +| `checksum` | text | existing | md5 of `code` | +| `datasource_id` | uuid | existing | FK `datasources.id` | +| `version_id` | uuid | existing | FK `versions.id` | +| `user_id` | uuid | existing | Authoring user | +| `created_at` / `updated_at` | timestamptz | existing | | + +**Permission change** (new delete permission, role `user`): + +```yaml +delete_permissions: + - role: user + permission: + filter: + _and: + - datasource: + team: + members: + _and: + - member_roles: + team_role: + _in: [owner, admin] + - user_id: + _eq: X-Hasura-User-Id + - version: + branch: + status: + _eq: active +``` + +Validation rules enforced by this filter: + +- Only owners and admins of the team that owns the datasource may delete. +- Only dataschemas attached to an **active-branch** version may be deleted (FR-007 — historical versions remain immutable). + +### 1.2 `versions` (existing table — two new columns + one trigger) + +| Field | Type | Existing? | Notes | +|---|---|---|---| +| `id` | uuid (PK) | existing | | +| `branch_id` | uuid | existing | FK `branches.id` | +| `user_id` | uuid | existing | | +| `checksum` | text | existing | | +| `markdown_doc` | text | existing | | +| `origin` | text | **NEW**, nullable, default `'user'` | CHECK (`origin IN ('user','smart_gen','rollback')`). Distinguishes rollback-origin versions for audit. | +| `is_current` | boolean | **NEW**, `NOT NULL DEFAULT true` | True for exactly one version per branch — the newest. Maintained by the `versions_flip_is_current_trg` AFTER-INSERT trigger: when a new version is inserted, the trigger flips the previous current row on the same branch to `false`. Backfilled in the migration so the current newest row per branch starts with `is_current = true` and all others start with `false`. Used by the `dataschemas.delete_permissions` filter to enforce version-level immutability (FR-007). | +| `created_at` / `updated_at` | timestamptz | existing | | + +Migration file: `services/hasura/migrations/1713600000000_dataschemas_delete_permission/up.sql`. Down migration reverts the delete permission, drops the trigger and function, and drops the `origin` and `is_current` columns. + +### 1.3 `branches` (existing — no schema change; vocabulary clarification only) + +| Field | Type | Notes | +|---|---|---| +| `id` | uuid (PK) | | +| `name` | text | | +| `status` | enum (`branch_statuses`) | **Actual enum values are `active`, `created`, `archived`** — per migrations `1680871325606_insert_into_public_branch_statuses` / `1680871332169_…` / `1680871339548_…`. One branch per datasource carries `active`; all others are "non-active" (historical) for the purposes of FR-007. | +| `datasource_id` | uuid | FK `datasources.id` | +| `user_id` | uuid | | + +No schema change. References in `delete_permissions` filter on `status._eq: active` and therefore work regardless of the non-active value name. + +### 1.4 `datasources`, `members`, `member_roles`, `teams` + +No changes. + +### 1.5 `audit_logs` (NEW table — introduced by this feature) + +| Field | Type | Notes | +|---|---|---| +| `id` | uuid (PK) | `DEFAULT gen_random_uuid()` | +| `action` | text | CHECK (`action IN ('dataschema_delete', 'version_rollback')`) | +| `user_id` | uuid | FK `users.id`, `ON DELETE CASCADE` | +| `datasource_id` | uuid (nullable) | FK `datasources.id`, `ON DELETE SET NULL` | +| `branch_id` | uuid (nullable) | FK `branches.id`, `ON DELETE SET NULL` | +| `target_id` | uuid | The deleted dataschema id or the rolled-back-from version id. | +| `outcome` | text | CHECK (`outcome IN ('success', 'failure')`) | +| `error_code` | text (nullable) | Stable FR-017 code when `outcome = 'failure'`. | +| `payload` | jsonb (nullable) | Operation-specific detail (e.g. cloned dataschema count for rollback; blocking references for a failed delete). | +| `created_at` | timestamptz | `DEFAULT now()` | + +**Indexes**: `created_at DESC`, `user_id`, `action`. + +**Hasura permissions**: **admin role only** for select/insert/update/delete. Role `user` has **no** permission — agents cannot read or tamper with the audit log. + +**Retention**: 90 days, enforced by a daily Hasura cron trigger that runs `DELETE FROM audit_logs WHERE created_at < now() - interval '90 days'`. + +**Writers**: Hasura event triggers `delete_dataschema_audit` and `version_rollback_audit` post to Actions RPC handlers that perform the INSERT using the admin secret. + +--- + +## 2. Transient entities (request/response bodies) + +These are not persisted; they exist only in handler memory and on the wire. Declared here to anchor the OpenAPI contracts in `contracts/` and the StepCI fixture structure. + +### 2.1 `DraftFile` + +Represents a candidate dataschema file submitted for contextual validation. + +```ts +interface DraftFile { + fileName: string; // "orders.yml" | "orders.js" + content: string; // raw YAML or JS source +} +``` + +Validation: + +- `fileName` must match `/^[A-Za-z0-9_\-.]+\.(yml|yaml|js)$/`. +- `content` must be non-empty and ≤ 1 MiB. + +### 2.2 `ValidateInBranchRequest` + +```ts +interface ValidateInBranchRequest { + branchId: string; // uuid + mode: "append" | "replace" | "preview-delete"; + draft?: DraftFile; // required when mode is "append" or "replace" + targetDataschemaId?: string; // uuid — required when mode is "replace" or "preview-delete" +} +``` + +Validation: + +- `mode === "append"` → `draft` required, `targetDataschemaId` must be absent. +- `mode === "replace"` → both `draft` and `targetDataschemaId` required. +- `mode === "preview-delete"` → `targetDataschemaId` required, `draft` must be absent. + +### 2.3 `CompileReport` + +```ts +interface CompileDiagnostic { + severity: "error" | "warning"; + message: string; + fileName: string | null; + startLine: number | null; + startColumn: number | null; + endLine: number | null; + endColumn: number | null; + code?: string; // stable error code (FR-017) +} + +interface CompileReport { + valid: boolean; + errors: CompileDiagnostic[]; + warnings: CompileDiagnostic[]; + blockingReferences?: BlockingReference[]; // present only for preview-delete when valid=false +} +``` + +### 2.4 `BlockingReference` + +Emitted by `preview-delete` and `DELETE /api/v1/dataschema/:id`. + +```ts +interface BlockingReference { + referringCube: string; // cube identifier, e.g. "order_items" + file: string; // dataschema fileName + referenceKind: + | "joins" + | "extends" + | "sub_query" + | "formula" + | "segment" + | "pre_aggregation" + | "filter_params"; + line: number | null; // 1-based +} +``` + +### 2.5 `RefreshCompilerRequest` + +```ts +interface RefreshCompilerRequest { + branchId: string; // uuid +} +``` + +### 2.6 `RefreshCompilerResponse` + +```ts +interface RefreshCompilerResponse { + evicted: number; // count of LRU entries removed + schemaVersion: string; // the hash whose cache entries were targeted +} +``` + +### 2.7 `DeleteDataschemaResponse` + +```ts +interface DeleteDataschemaResponse { + deleted: boolean; + dataschemaId: string; +} +``` + +Error response (FR-008 blocking refs): + +```ts +interface DeleteBlockedResponse { + code: "delete_blocked_by_references"; + message: string; + blockingReferences: BlockingReference[]; +} +``` + +### 2.8 `SingleCubeMeta` + +```ts +interface SingleCubeMeta { + cube: { + name: string; + title: string | null; + description: string | null; + public: boolean; + measures: MeasureMeta[]; + dimensions: DimensionMeta[]; + segments: SegmentMeta[]; + hierarchies?: HierarchyMeta[]; + meta: Record | null; + }; + datasourceId: string; + branchId: string; + versionId: string; +} +``` + +(Shapes of `MeasureMeta`, `DimensionMeta`, `SegmentMeta`, `HierarchyMeta` mirror the envelopes Cube.js already returns from `/api/v1/meta` — no new shape is invented.) + +### 2.9 `VersionDiff` + +```ts +interface CubeFieldChange { + field: "measures" | "dimensions" | "segments" | "meta"; + added: string[]; // field names + removed: string[]; + modified: string[]; // field names whose `sql` or type changed +} + +interface CubeChange { + cubeName: string; + file: string; + changes: CubeFieldChange[]; +} + +interface VersionDiffResponse { + branchId: string; + fromVersionId: string; + toVersionId: string; + addedCubes: { cubeName: string; file: string }[]; + removedCubes: { cubeName: string; file: string }[]; + modifiedCubes: CubeChange[]; +} +``` + +### 2.10 `RollbackRequest` + +```ts +interface RollbackRequest { + branchId: string; // uuid + toVersionId: string; // uuid — must belong to branchId +} +``` + +### 2.11 `RollbackResponse` + +```ts +interface RollbackResponse { + newVersionId: string; // uuid of the freshly inserted version + clonedDataschemaCount: number; // sanity-check for the caller +} +``` + +--- + +## 3. State transitions + +### 3.1 Dataschema lifecycle + +``` + ┌──────────────┐ ┌──────────────┐ + │ not exist │ insert_versions_one │ attached │ + │ │───────────────────────▶ │ to version │ + └──────────────┘ └──────┬───────┘ + │ + │ update (code / name) + ▼ + ┌──────────────┐ + │ attached, │ + │ mutated │ + └──────┬───────┘ + │ delete_dataschemas_by_pk + │ (only if branch.status = active + │ and no blocking references) + ▼ + ┌──────────────┐ + │ removed │ + └──────────────┘ +``` + +### 3.2 Version lifecycle + +``` +┌──────────────┐ insert_versions_one ┌──────────────┐ +│ not exist │ ──────────────────────────────▶ │ active / │ +│ │ (origin in {user, smart_gen, │ historical │ +└──────────────┘ rollback}) │ (immutable) │ + └──────────────┘ +``` + +Versions never leave the "immutable" state. Rollback does not mutate existing versions; it inserts a new one with `origin = 'rollback'`. + +### 3.3 Compiler cache entry lifecycle + +``` +┌──────────────┐ first query on branch ┌────────────┐ +│ not cached │ ─────────────────────────▶ │ cached │ +│ │ (LRU.set on appId) │ │ +└──────────────┘ └─────┬──────┘ + ▲ │ + │ │ POST /api/v1/internal/refresh-compiler + │ │ OR LRU eviction (TTL/size) + └───────────────────────────────────────────┘ +``` + +No intermediate state; the cache is strictly in-memory. + +--- + +## 4. Relationships + +``` +datasources ─1─∞→ branches ─1─∞→ versions ─1─∞→ dataschemas + ▲ ▲ + └── team_id ──→ teams │ + ▲ │ + members ─── member_roles + +audit_logs ─── user_id ──→ users + ─── datasource_id ──→ datasources (nullable) + ─── branch_id ──→ branches (nullable) +``` + +Three new foreign keys: `audit_logs.user_id`, `audit_logs.datasource_id`, `audit_logs.branch_id`. + +--- + +## 5. Validation rules summary + +| Rule | Source | Enforced by | +|---|---|---| +| Only owners/admins of team can delete a dataschema. | FR-006 | Hasura `delete_permissions` filter + handler re-check | +| Only dataschemas on the **latest version of the active branch** can be deleted. | FR-007 | Hasura `delete_permissions` filter on `version.is_current._eq: true AND version.branch.status._eq: active` + handler re-check that returns `delete_blocked_historical_version` | +| Deletion blocked by cross-cube reference. | FR-008 | `utils/referenceScanner.js` in handler, before calling Hasura mutation | +| Owner/admin role required for every mutating operation (delete, rollback, refresh, validate-in-branch replace/preview-delete). | FR-015 | Handler checks `user.members[].member_roles.team_role` contains `owner` or `admin` for the target team | +| Diff rejects cross-branch pairs. | FR-012 | Handler pre-check (single GraphQL query of `branch_id` for both versions) | +| Rollback only clones dataschemas, not dependents. | FR-013a | Handler implementation (R5) | +| Refresh evicts only branch-scoped cache entries. | FR-004 | `utils/compilerCacheInvalidator.js` iterates and filters by `schemaVersion` suffix | +| Refresh is idempotent per (branch, schemaVersion) pair. | FR-005 | Second eviction of same `schemaVersion` finds nothing to evict | +| Single-cube metadata respects access-list visibility. | FR-010 | Handler reuses `apiGateway.filterVisibleItemsInMeta` | +| Audit records cover every outcome path (success + all failures). | FR-016, SC-007 | Hasura event trigger for success commits + in-handler `writeAuditLog` for every failure branch | + +--- + +All entities, shapes, transitions, and validations are now specified. Proceed to contracts. diff --git a/specs/011-model-mgmt-api/plan.md b/specs/011-model-mgmt-api/plan.md new file mode 100644 index 00000000..7e8f1cea --- /dev/null +++ b/specs/011-model-mgmt-api/plan.md @@ -0,0 +1,130 @@ +# Implementation Plan: Model Management API + +**Branch**: `011-model-mgmt-api` | **Date**: 2026-04-20 | **Spec**: [spec.md](./spec.md) +**Input**: Feature specification from `/specs/011-model-mgmt-api/spec.md` + +## Summary + +Five server-side capabilities that let an authenticated agent own the full semantic-model lifecycle without operator assistance: contextual validation of a draft against a branch's deployed cubes, asynchronous invalidation of the compiler cache scoped to a branch's dataschemas, deletion of a dataschema with blocking-reference detection, single-cube compiled-metadata lookup, and structured diff plus rollback between versions on the same branch. All endpoints ship on the existing CubeJS Express router; no new service containers or runtimes are introduced. New handler files land inside **existing** services only — `services/cubejs/src/routes/` (six routes) and `services/actions/src/rpc/` (two audit RPC handlers). Persistence for deletion uses a new Hasura `delete_permissions` block on the existing `dataschemas` table; rollback reuses `insert_versions_one`. Durable audit records for delete + rollback are persisted to a new `audit_logs` table added by this feature; refresh is cache-only and emits a non-durable structured log line only. + +## Technical Context + +**Language/Version**: JavaScript (ES modules), Node.js 22.x (already current in cubejs service after 003-update-deps) +**Primary Dependencies**: `@cubejs-backend/schema-compiler` ^1.6.19 (existing; `prepareCompiler` powers validation), `@cubejs-backend/server-core` ^1.6.19 (existing; exposes `cubejs.compilerCache` LRU-cache), `@cubejs-backend/api-gateway` ^1.6.19 (existing; `getCompilerApi` + `filterVisibleItemsInMeta`), `jose` (existing; FraiOS/WorkOS JWT verification), Express 4.x (existing router). No new dependencies. +**Storage**: PostgreSQL via Hasura (existing `dataschemas`, `versions`, `branches` tables — one new Hasura delete-permission migration on `dataschemas`). In-memory LRU compiler cache inside the cubejs process (existing). No new tables. +**Testing**: StepCI workflow tests under `tests/stepci/workflows/model-management/` for end-to-end API contract coverage; Vitest unit tests co-located under `services/cubejs/src/routes/__tests__/` and `services/cubejs/src/utils/__tests__/`. Hasura migration tested via `./cli.sh hasura cli "migrate apply"` in a clean environment per constitution. +**Target Platform**: Linux container (existing cubejs service image `quicklookup/synmetrix-cube`), deployed by Kustomize overlay in the `cxs` repo. +**Project Type**: Web service (single backend service). +**Performance Goals**: Refresh endpoint responds in under 500 ms p95 (eviction is in-process LRU deletion). Contextual validation completes in under 10 s p95 for a branch of up to 50 cubes (bounded by `prepareCompiler` cold-compile time on one core). Diff endpoint responds in under 2 s p95 for versions of up to 50 cubes. Single-cube metadata inherits aggregate-meta latency; SC-005 targets payload reduction rather than wall-clock. +**Constraints**: Compiler-cache invalidation must not clear pre-aggregation cache or user-scope caches (FR-004, Clarification Q1). Rollback must clone only dataschemas (FR-013a, Clarification Q3). Deletion must detect all seven cross-cube reference kinds enumerated in FR-008. Persistent mutating operations (delete, rollback) must emit a durable audit record (FR-016) using the existing Hasura event-trigger pattern already established on `versions.generate_dataschemas_docs`. Refresh is cache-only (FR-004) and emits a non-durable structured log line instead. +**Scale/Scope**: Tens of datasources per tenant, ≤50 cubes per branch, ≤1000 dataschemas across history per datasource; agent call volume is low dozens per minute per tenant. + +## Constitution Check + +*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.* + +### I. Service Isolation — PASS + +- All endpoints live in the existing `services/cubejs` process and router; no cross-service coupling is introduced. +- The single cross-service contract change is a new Hasura delete permission on `dataschemas`. That is a Hasura-local schema change, not a new service dependency. +- Deletion and rollback both use existing Hasura mutations (`delete_dataschemas_by_pk`, `insert_versions_one`) called via the in-process `fetchGraphQL` helper; no new RPC handler, no new Actions-service endpoint. + +### II. Multi-Tenancy First — PASS + +- Every endpoint flows through `checkAuth.js` (or an equivalent direct-verify path for routes that do not need a datasource header, exactly mirroring `metaAll.js` and `discover.js`). +- Deletion and rollback invoke `defineUserScope` for the target branch and refuse if the caller's team partition does not include the datasource (same pattern as `resolvePartitionTeamIds`). +- Refresh derives its branch-scoped `schemaVersion` from the caller's resolved scope and only deletes cache entries whose appId matches that `schemaVersion`. +- The compiler-cache invalidation primitive does not touch `buildSecurityContext`; it operates one level above (the LRU cache of compiled APIs keyed by the existing `CUBEJS_APP_{dataSourceVersion}_{schemaVersion}` appId). + +### III. Test-Driven Development — PASS + +- Every route gets a StepCI workflow in `tests/stepci/workflows/model-management/` before the implementation file compiles. Unit tests co-located under `src/routes/__tests__/` for the validate, refresh, delete, meta-single, diff, and rollback handlers are written before their handlers. +- The Hasura delete-permission migration is verified by a StepCI test that attempts (and expects rejection of) a delete of a dataschema attached to a non-active-branch version. +- The cache-invalidation helper has unit tests that populate a fake LRU cache with known appIds and verify only the branch-scoped entries are removed. + +### IV. Security by Default — PASS + +- Authentication: every endpoint requires a valid FraiOS, WorkOS, or Hasura HS256 token; unauthenticated requests return 403 before any side effect. +- Authorisation: delete and rollback require owner or admin role on the target datasource's team (enforced by Hasura permissions already present on `dataschemas` and `versions`, plus a new `dataschemas.delete_permissions` block mirroring the existing insert/update policies). +- Visibility filtering: single-cube metadata reuses `apiGateway.filterVisibleItemsInMeta` exactly as `metaAll.js:71` already does. +- Audit: every mutating operation emits an event captured by Hasura's existing event-trigger infrastructure; no new secret, no new log sink. +- No new secret material is introduced. No new outbound network dependency. + +### V. Simplicity / YAGNI — PASS + +- No new dependency. No new service. No new database table. No new cache store. +- All new logic composes existing primitives: `prepareCompiler` for validation, `cubejs.compilerCache.delete(appId)` for refresh, `findDataSchemasByIds` plus `diffModels` for diff, `findDataSchemas` plus `createDataSchema` for rollback. +- Validation request body intentionally mirrors the existing `POST /api/v1/validate` plus two new fields (`branchId`, `mode`). No new data-shape vocabulary. +- Deletion is a single Hasura mutation wrapped with a cross-reference scan implemented by iterating the already-parsed cubes in memory; no dependency-graph cache. + +No constitutional violations. Complexity Tracking table is intentionally empty. + +## Project Structure + +### Documentation (this feature) + +```text +specs/011-model-mgmt-api/ +├── plan.md # This file +├── research.md # Phase 0 output +├── data-model.md # Phase 1 output +├── quickstart.md # Phase 1 output +├── contracts/ # Phase 1 output +│ ├── validate-in-branch.yaml +│ ├── refresh-compiler.yaml +│ ├── delete-dataschema.yaml +│ ├── meta-single-cube.yaml +│ ├── version-diff.yaml +│ └── version-rollback.yaml +└── checklists/ + └── requirements.md # Already created by /speckit.specify +``` + +### Source Code (repository root) + +Single backend service. Additions follow the existing `routes/` + `utils/` split and mirror the layout of `010-dynamic-models-ii`. + +```text +services/cubejs/src/ +├── routes/ +│ ├── validateInBranch.js # NEW — FR-001..FR-003 +│ ├── refreshCompiler.js # NEW — FR-004, FR-004a, FR-005 +│ ├── deleteDataschema.js # NEW — FR-006, FR-007, FR-008 +│ ├── metaSingleCube.js # NEW — FR-009, FR-010 +│ ├── versionDiff.js # NEW — FR-011, FR-012 +│ ├── versionRollback.js # NEW — FR-013, FR-013a, FR-014 +│ ├── index.js # MODIFIED — register 6 new routes +│ └── __tests__/ +│ ├── validateInBranch.test.js +│ ├── refreshCompiler.test.js +│ ├── deleteDataschema.test.js +│ ├── metaSingleCube.test.js +│ ├── versionDiff.test.js +│ └── versionRollback.test.js +└── utils/ + ├── compilerCacheInvalidator.js # NEW — invalidateCompilerForBranch() + ├── referenceScanner.js # NEW — scanCrossCubeReferences() (FR-008) + ├── dataSourceHelpers.js # MODIFIED — add findVersionDataschemas(), deleteDataschema(), rollbackVersion() + └── __tests__/ + ├── compilerCacheInvalidator.test.js + └── referenceScanner.test.js + +services/hasura/migrations/ +└── 1713600000000_dataschemas_delete_permission/ + ├── up.sql + └── down.sql + +tests/stepci/workflows/model-management/ +├── validate-in-branch.yml +├── refresh-compiler.yml +├── delete-dataschema.yml +├── meta-single-cube.yml +├── version-diff.yml +└── version-rollback.yml +``` + +**Structure Decision**: Additive layout mirroring `010-dynamic-models-ii`. All new server code lives inside `services/cubejs/src/`. One Hasura migration. StepCI workflows grouped under a new `tests/stepci/workflows/model-management/` folder. No client-v2 changes are in scope; the frontend continues to use the existing catalog/meta endpoints and is unaffected by the additions. + +## Complexity Tracking + +No violations. Table intentionally omitted. diff --git a/specs/011-model-mgmt-api/quickstart.md b/specs/011-model-mgmt-api/quickstart.md new file mode 100644 index 00000000..4b8cbbd7 --- /dev/null +++ b/specs/011-model-mgmt-api/quickstart.md @@ -0,0 +1,234 @@ +# Quickstart — Model Management API + +Date: 2026-04-20 +Branch: `011-model-mgmt-api` + +This quickstart walks through the full agent lifecycle: author → contextual-validate → persist → refresh → diff → rollback → delete. Every call uses a FraiOS JWT (HS256, `accountId` claim present) obtained from cxs2. + +## 0. Prerequisites + +- Docker stack running: `./cli.sh compose up` +- Agent holds a FraiOS token in `$TOKEN` (set below each snippet implicitly). +- Datasource id and branch id are discoverable via `GET /api/v1/meta-all`. + +```bash +# Catalog discovery (no datasource header needed) +curl -sS -H "Authorization: Bearer $TOKEN" http://localhost:4000/api/v1/meta-all \ + | jq '.datasources[] | {datasource_id, datasource_name, branch_id, version_id, + cubes: [.cubes[] | {name, dataschema_id, file_name}]}' +``` + +Every cube summary now carries **`dataschema_id`** and **`file_name`** alongside `name`. Use them to resolve cube name → dataschema id in a single round-trip when calling `validate-in-branch` (mode=replace/preview-delete) or `DELETE /api/v1/dataschema/:id`. + +Set for the rest of this guide: + +```bash +DS=$(curl -sS -H "Authorization: Bearer $TOKEN" http://localhost:4000/api/v1/meta-all \ + | jq -r '.datasources[0].datasource_id') +BR=$(curl -sS -H "Authorization: Bearer $TOKEN" http://localhost:4000/api/v1/meta-all \ + | jq -r '.datasources[0].branch_id') + +# Resolve a dataschema id by cube name for the first datasource: +ORDERS_DSID=$(curl -sS -H "Authorization: Bearer $TOKEN" http://localhost:4000/api/v1/meta-all \ + | jq -r '.datasources[0].cubes[] | select(.name == "orders") | .dataschema_id') +``` + +> **Listing branch versions** (for diff / rollback): query Hasura directly via the proxied GraphQL endpoint: +> ```graphql +> query($b: uuid!) { +> versions(where: {branch_id: {_eq: $b}}, order_by: {created_at: desc}) { +> id created_at origin is_current +> } +> } +> ``` +> The FraiOS token is minted-to-Hasura server-side by `/v1/graphql`, so Tychi forwards the same token it uses for `/api/v1/*`. + +## 1. Contextual validation of a draft (US1, FR-001..FR-003) + +Validate a new cube file against the branch's deployed cubes: + +```bash +curl -sS -X POST http://localhost:4000/api/v1/validate-in-branch \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d @- <", + "draft": { + "fileName": "orders.yml", + "content": "" + } + }' +``` + +Simulate a deletion and see the blocking references: + +```bash +curl -sS -X POST http://localhost:4000/api/v1/validate-in-branch \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "branchId": "'"$BR"'", + "mode": "preview-delete", + "targetDataschemaId": "" + }' | jq '.blockingReferences' +``` + +## 2. Persist the new draft (existing Hasura GraphQL proxy) + +Write the draft into a new version on the branch (the skill's existing Hasura mutation path). This uses the `/v1/graphql` proxy, which accepts the FraiOS token unchanged: + +```bash +curl -sS -X POST http://localhost:4000/v1/graphql \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "query": "mutation($o: versions_insert_input!) { insert_versions_one(object: $o) { id } }", + "variables": { + "o": { + "branch_id": "'"$BR"'", + "dataschemas": { "data": [{ "name": "orders.yml", "code": "", "datasource_id": "'"$DS"'" }] } + } + } + }' | jq +``` + +## 3. Force compiler refresh after an in-place edit (US2, FR-004..FR-005) + +If you used `update_dataschemas_by_pk` to edit `code` in place (rather than inserting a new version), the compiler cache still serves the old model until you refresh: + +```bash +curl -sS -X POST http://localhost:4000/api/v1/internal/refresh-compiler \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"branchId": "'"$BR"'"}' | jq +# -> { "evicted": 3, "schemaVersion": "ab12…" } +``` + +Subsequent metadata/load requests for the branch will recompile on first hit and surface any compile errors on *that* response (asynchronous model — FR-004a). + +> **Authorisation**: refresh requires **owner or admin** role on the datasource's team (FR-015, research.md §R14). A member-only caller receives `403 {"code":"refresh_unauthorized"}`. Refresh is gated at the same bar as delete and rollback because it affects the compiled view that every user of the branch sees. +> +> **Idempotence**: idempotence is per `(branchId, schemaVersion)` pair, not wall-clock. A second call with no intervening edit returns `evicted: 0`. If the dataschemas change between calls, the new `schemaVersion` makes the second call a different logical operation and it evicts the new hash's entries. + +## 4. Single-cube metadata (US4, FR-009..FR-010) + +Fetch compiled metadata for one cube without pulling the whole catalog: + +```bash +curl -sS http://localhost:4000/api/v1/meta/cube/orders \ + -H "Authorization: Bearer $TOKEN" \ + -H "x-hasura-datasource-id: $DS" \ + -H "x-hasura-branch-id: $BR" | jq +``` + +> The path is `/api/v1/meta/cube/{cubeName}` — a dedicated segment (`/cube/`) prevents collision with Cube.js's built-in `GET /api/v1/meta` aggregate endpoint. +> +> **Historical meta**: this endpoint always compiles the **latest version** of the requested branch. If you need a cube's compiled metadata as it looked on a historical version (for audit or diff-preview purposes), fall back to Cube.js's built-in `GET /api/v1/meta` with `x-hasura-branch-id` + `x-hasura-branch-version-id` headers, then filter client-side by cube name. That endpoint is unchanged by this feature. + +Cube not found → 404 with `code: "cube_not_found"`. + +## 5. Diff between versions (US5, FR-011..FR-012) + +```bash +# fromVersionId = version before the change, toVersionId = current active version +curl -sS -X POST http://localhost:4000/api/v1/version/diff \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "fromVersionId": "", + "toVersionId": "" + }' | jq +``` + +Response lists `addedCubes`, `removedCubes`, and `modifiedCubes` with per-field deltas. + +## 6. Rollback (US5, FR-013..FR-014) + +Regret the last change? Roll back: + +```bash +curl -sS -X POST http://localhost:4000/api/v1/version/rollback \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "branchId": "'"$BR"'", + "toVersionId": "" + }' | jq +# -> { "newVersionId": "…", "clonedDataschemaCount": 7 } +``` + +The branch's active version is now the freshly inserted clone. Explorations and alerts bound to older versions are untouched (FR-013a). + +## 7. Delete a cube (US3, FR-006..FR-008) + +Preview first to see any blocking references: + +```bash +curl -sS -X POST http://localhost:4000/api/v1/validate-in-branch \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"branchId":"'"$BR"'","mode":"preview-delete","targetDataschemaId":""}' | jq +``` + +If `blockingReferences` is empty, delete: + +```bash +curl -sS -X DELETE http://localhost:4000/api/v1/dataschema/ \ + -H "Authorization: Bearer $TOKEN" | jq +# -> { "deleted": true, "dataschemaId": "" } +``` + +On 409 with `code: "delete_blocked_by_references"`, the `blockingReferences` list tells you which other cubes need updating first. + +## 8. End-to-end acceptance (SC-001) + +A single agent session can run steps 1 → 7 in under five minutes on a branch with ten cubes. StepCI workflow `tests/stepci/workflows/model-management/end-to-end.yml` exercises this path in CI. + +## Error codes reference + +Authoritative enumeration — matches the `ErrorCode` schema in every contract and the `errorCodes.js` export (FR-017). + +| Code | Endpoint | HTTP | Meaning | +|---|---|---|---| +| `validate_invalid_mode` | validate-in-branch | 400 | Mode/field combination invalid. | +| `validate_target_not_found` | validate-in-branch | 404 | `targetDataschemaId` does not belong to branch. | +| `validate_unresolved_reference` | validate-in-branch | 200 (in CompileReport.errors[].code) | Draft references a cube/field not present in the branch. | +| `refresh_branch_not_visible` | refresh-compiler | 404 | Caller cannot see the branch. | +| `refresh_unauthorized` | refresh-compiler | 403 | Caller lacks owner/admin role on the datasource team. | +| `delete_blocked_by_references` | dataschema DELETE | 409 | FR-008 blocking refs. Response carries `blockingReferences[]`. | +| `delete_blocked_historical_version` | dataschema DELETE | 409 | Target is **not on the latest version of the active branch** — including older versions of the active branch. | +| `delete_blocked_authorization` | dataschema DELETE | 403 | Caller lacks owner/admin role on the datasource team. | +| `cube_not_found` | meta-single | 404 | Cube not present on the branch's latest version. | +| `diff_cross_branch` | version-diff | 400 | Versions belong to different branches. | +| `diff_invalid_request` | version-diff | 400 | Malformed request body. | +| `rollback_version_not_on_branch` | version-rollback | 400 | `toVersionId` not attached to `branchId`. | +| `rollback_invalid_request` | version-rollback | 400 | Malformed request body. | +| `rollback_source_columns_missing` | version-rollback | 400 | Target version references source columns that no longer exist. | + +Every non-2xx response body carries `{code: ErrorCode, message: string}` for programmatic handling. The `ErrorCode` component is declared identically in every contract file; CI task `lint:error-codes` (T013f/T051) prevents drift. diff --git a/specs/011-model-mgmt-api/research.md b/specs/011-model-mgmt-api/research.md new file mode 100644 index 00000000..d6c6befd --- /dev/null +++ b/specs/011-model-mgmt-api/research.md @@ -0,0 +1,577 @@ +# Phase 0 Research — Model Management API + +Date: 2026-04-20 +Branch: `011-model-mgmt-api` + +All Technical Context fields are fully specified. There are **no `NEEDS CLARIFICATION` markers** — the clarification session already resolved the ambiguities. The items below cover technical unknowns surfaced while tracing each requirement through existing code. + +--- + +## R1 — Compiler cache identity and invalidation primitive + +**Decision**: Invalidate by deleting LRU entries whose `appId` ends with the branch's current `schemaVersion` hash. + +**Rationale**: `services/cubejs/index.js:45-46` defines: + +```js +const contextToAppId = ({ securityContext }) => + `CUBEJS_APP_${securityContext?.userScope?.dataSource?.dataSourceVersion}_${securityContext?.userScope?.dataSource?.schemaVersion}}`; +``` + +`schemaVersion` is computed in `buildSecurityContext.js:53` as `md5(dataschemaIds)`. Two users querying the same branch share the same `schemaVersion` but may differ in `dataSourceVersion` (which includes team-properties hash). So: + +- The same branch produces **one `schemaVersion`** across callers. +- Multiple cached `CompilerApi` instances may exist for one branch (one per team-properties hash). +- A content-only edit of a dataschema keeps the `schemaVersion` identical → stale cache. + +`@cubejs-backend/server-core` stores `compilerCache` as a plain `LRUCache` (`server.js:107`) exposed at `cubejs.compilerCache`. `generateDataSchema.js:113` and `smartGenerate.js:822` already call `cubejs.compilerCache.purgeStale()` after writes; that only evicts TTL-expired entries, so it is insufficient for our refresh semantics. + +The LRU instance exposes `.keys()`, `.delete(key)`, and `.clear()`. Deleting specific keys is O(1). Our helper iterates keys once and deletes only the matching subset. + +**Implementation sketch** (`utils/compilerCacheInvalidator.js`): + +```js +export function invalidateCompilerForBranch(cubejs, schemaVersion) { + const cache = cubejs.compilerCache; + if (!cache || typeof cache.keys !== "function") return 0; + const suffix = `_${schemaVersion}}`; + let evicted = 0; + for (const key of cache.keys()) { + if (typeof key === "string" && key.endsWith(suffix)) { + cache.delete(key); + evicted += 1; + } + } + return evicted; +} +``` + +**Alternatives considered**: + +- `cubejs.compilerCache.clear()` — evicts every tenant's cache. Violates Q1 (blast radius limited to target branch). +- Bump `dataSourceVersion` by mutating team settings — mutates unrelated state; violates Multi-Tenancy First. +- Insert a new version (the existing workaround) — not an invalidation; mutates history; explicitly rejected by US2's purpose. + +--- + +## R2 — Compiler invocation for contextual validation + +**Decision**: Reuse `@cubejs-backend/schema-compiler` `prepareCompiler` with an `InMemorySchemaFileRepository`, seeded with the branch's current dataschemas merged with the draft according to the selected mode (`append` / `replace` / `preview-delete`). + +**Rationale**: `routes/validate.js` already uses this exact primitive for stateless file-level validation (`validate.js:7-19, 69-74`). Contextual validation simply seeds the same repository with more files. + +**Mode semantics**: + +- `append` — add the draft file to the repository alongside every existing dataschema. Collision on `fileName` is a validation error. +- `replace` — require `targetDataschemaId` in the request; swap that dataschema's `code` for the draft's content and keep all other dataschemas. +- `preview-delete` — require `targetDataschemaId`; build the repository excluding that dataschema and run the compiler. If compilation fails, every error is a blocking reference; report them so the caller can proceed to the delete endpoint informed. + +The compiler's `errorsReport` structure (`validate.js:86-111`) already surfaces errors and warnings with file, line, column, and plain message. We reuse the mapping helpers (`mapCompilerError`, `mapSyntaxWarning`). + +**Alternatives considered**: + +- Custom YAML parser that statically resolves cross-cube references — reimplements the compiler; high false-positive risk; violates SC-003 (false-negative rate must be zero against the real compiler). +- Shelling out to `cubejs.apiGateway().getCompilerApi(context)` — requires a datasource-scoped security context, which forces the caller to pass `x-hasura-datasource-id`. That works but couples validation to a specific user's scope. `prepareCompiler` alone is scope-agnostic and simpler. + +--- + +## R3 — Cross-cube reference detection for FR-008 + +**Decision**: A hybrid approach — parse each other cube into its AST/YAML structure once per delete request and scan for seven reference kinds by textual pattern on the cube's source, then fall back to a compiler probe (same primitive as R2's `preview-delete` mode) as an additional safety net. + +**Rationale**: The seven reference kinds enumerated in FR-008 are: + +| # | Kind | Detection | +|---|---|---| +| a | `joins` entries | YAML `joins[*].sql` or `joins[*].name` referencing `TARGET.`/`${TARGET}` | +| b | `extends` chains | YAML `extends: TARGET` or JS `cube('X', { extends: TARGET, ... })` | +| c | `sub_query` measures/dimensions | YAML `sub_query: true` with a formula referencing `TARGET.` | +| d | Measure/dimension formula references | `sql` bodies containing `TARGET.` or `${TARGET}.` | +| e | Segment inheritance | YAML `segments[*].sql` referencing `TARGET.` | +| f | Pre-aggregation rollup references | YAML `pre_aggregations[*].measureReferences[*]` / `dimensionReferences[*]` / `timeDimensionReference` / `rollups[*]` containing `TARGET.` | +| g | `FILTER_PARAMS..*` | Regex `FILTER_PARAMS\.TARGET\.` in any `sql` body | + +Textual scan is fast and deterministic; compiler probe catches anything the enumeration misses (the spec's SC-003 target). Running both is cheap: we compile the branch anyway inside `preview-delete`, and the textual scan lets us produce a richer error response (`referring_cube`, `file`, `reference_kind`, `line`) that the compiler's raw error report does not directly emit. + +**Implementation sketch** (`utils/referenceScanner.js`): + +```js +const REFERENCE_PATTERNS = [ + { kind: "filter_params", re: (t) => new RegExp(`FILTER_PARAMS\\.${t}\\.`) }, + { kind: "cube_reference", re: (t) => new RegExp(`(?:\\$\\{${t}\\}|\\b${t})\\.[a-zA-Z_][a-zA-Z0-9_]*`) }, + { kind: "extends", re: (t) => new RegExp(`extends:\\s*["']?${t}["']?`) }, + // … +]; + +export function scanCrossCubeReferences(targetCubeName, otherCubes) { + const hits = []; + for (const cube of otherCubes) { + for (const { kind, re } of REFERENCE_PATTERNS) { + const pattern = re(targetCubeName); + const match = pattern.exec(cube.code); + if (match) { + hits.push({ + referring_cube: cube.cubeName, + file: cube.fileName, + reference_kind: kind, + line: lineOf(cube.code, match.index), + }); + } + } + } + return hits; +} +``` + +**Alternatives considered**: + +- Pure compiler probe (no textual scan) — works but produces opaque error strings like "Cube A references dimension B" without a structured `reference_kind`. Tychi needs structured errors per FR-017. +- Full AST parse per cube — overkill; YAML structure is already accessible via `YAML.parse` from `routes/discover.js:22`; a textual pattern on the `sql` strings inside the parsed cube is sufficient and far simpler. + +--- + +## R4 — Hasura permission model for deletion (version-level immutability) + +**Decision**: Add a trigger-maintained boolean column `versions.is_current` (defaulting to `true` on insert, with the previous `is_current=true` row on the same branch flipped to `false` atomically). Delete permission on `dataschemas` then requires **owner/admin role + active branch + `version.is_current = true`**. The handler (T028) additionally re-checks the invariant in application code before firing the mutation, so the check lives at two layers. + +**Rationale**: Versions are immutable snapshots within a branch. The runtime compiles only the newest version (`metaAll.js:50` selects `activeBranch.versions?.[0]` under `order_by: {created_at: desc}` from `dataSourceHelpers.js:110-117`). "Historical" therefore means **not-the-latest-version-of-the-active-branch**, including older versions of the currently-active branch — not merely "on a non-active branch". A permission filter keyed only on `branch.status = active` would still allow deleting a dataschema attached to v5 when v7 is the compiled version. + +Hasura row-level permissions cannot express "the row belongs to the maximum-created_at version of its branch" natively without a computed field or a derived column. A trigger-maintained column is the simplest durable primitive: + +```sql +ALTER TABLE versions ADD COLUMN is_current boolean NOT NULL DEFAULT true; + +-- Backfill: for each branch, the newest version keeps is_current=true; older rows flip to false. +UPDATE versions v SET is_current = false +WHERE v.id NOT IN ( + SELECT DISTINCT ON (branch_id) id FROM versions ORDER BY branch_id, created_at DESC +); + +CREATE OR REPLACE FUNCTION versions_flip_is_current() +RETURNS TRIGGER AS $$ +BEGIN + UPDATE versions SET is_current = false + WHERE branch_id = NEW.branch_id AND id <> NEW.id AND is_current = true; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER versions_flip_is_current_trg +AFTER INSERT ON versions +FOR EACH ROW EXECUTE FUNCTION versions_flip_is_current(); +``` + +Resulting `delete_permissions` on `dataschemas`: + +```yaml +delete_permissions: + - role: user + permission: + filter: + _and: + - datasource: + team: + members: + _and: + - member_roles: { team_role: { _in: [owner, admin] } } + - user_id: { _eq: X-Hasura-User-Id } + - version: + is_current: { _eq: true } + branch: + status: { _eq: active } +``` + +Handler (T028) ALSO queries the target row's `version.is_current` and `version.branch.status` and returns `delete_blocked_historical_version` early if either fails, so agents get a structured error rather than the opaque Hasura permission rejection. + +**Migration**: the schema change, backfill, and trigger all land in `services/hasura/migrations/1713600000000_dataschemas_delete_permission/up.sql`. Down migration drops the trigger, the function, and the column. + +**Alternatives considered**: + +- Hasura **computed field** returning the latest-version-id per branch — works, but adds a SELECT subquery to every permission check. A trigger-maintained column is O(1) to read. +- Handler-only enforcement with no DB-level constraint — fragile; any future caller that reaches Hasura directly (admin-secret RPC, manual SQL) could bypass the invariant. +- Soft-delete column on `dataschemas` — keeps history; introduces a new filter condition everywhere. Violates YAGNI and doesn't match the user's "truly remove this cube" expectation. + +--- + +## R5 — Rollback as new-version insertion + +**Decision**: A rollback request loads the target version's dataschemas (`findDataSchemasByIds`), strips their identifiers, inserts a new `versions` row with the cloned dataschemas (`createDataSchema`), and returns the new version identifier. No existing record is mutated. + +**Rationale**: `createDataSchema` already takes `{ branch_id, dataschemas: { data: [...] } }` and inserts an `insert_versions_one` — matches the snapshot semantic. FR-013a requires dataschema-only cloning; we do **not** touch `explorations`, `alerts`, or any other table. + +Activation of the new version is automatic: `findUser` returns the latest version per branch (`dataSourceHelpers.js:110-117`, `versions(limit: 1, order_by: {created_at: desc})`), so inserting the new row makes it the effective active version without a status update. + +**Idempotency**: Rollback is naturally non-idempotent at the HTTP layer (each call inserts a new version). The idempotency concern in FR-005 applies only to refresh; rollback carries no such constraint. + +**Alternatives considered**: + +- Update the branch's active version pointer to the target — breaks the "versions always ordered by `created_at`" invariant used by `findUser`. +- Hard-copy via SQL — bypasses Hasura permissions; violates Security by Default. + +--- + +## R6 — Single-cube metadata path + +**Decision**: Extract a new `compileMetaForBranch` helper that returns the **raw** `metaConfig` output (full cube envelopes, not summarized), with configurable branch/version selection. Use it from both `/api/v1/meta-all` (which still summarizes post-call) and the new `/api/v1/meta/cube/:cubeName` (which filters by name and returns the unmodified envelope). + +**Rationale**: The existing `metaForDatasource` helper in `metaAll.js` is unfit for direct reuse for two reasons: + +1. **It summarizes.** `summarizeCube` (`metaAll.js:24-42`) flattens each cube to `{measures: string[], dimensions: string[], segments: string[]}` — just names. The single-cube contract needs the full compiled member objects (`{name, title, type, sql, format, meta, ...}`) so the agent can inspect members without a second round-trip. +2. **It always picks the active branch.** `metaForDatasource` (`metaAll.js:44-50`) does `ds.branches?.find((b) => b.status === "active")`. The single-cube contract accepts an optional `x-hasura-branch-id` header; honouring it is the whole point of the endpoint. + +**New helper contract** (`services/cubejs/src/utils/metaForBranch.js`): + +```js +export async function compileMetaForBranch({ apiGateway, req, userId, authToken, dataSource, branchId, versionId, allMembers }) { + // Resolve the requested branch (or active branch if branchId omitted). + // Resolve the requested version (or latest if versionId omitted). + // Build securityContext via defineUserScope, just like metaAll. + // Return { branchId, versionId, metaConfig } — raw, visibility-filtered, not summarized. +} +``` + +- `metaAll.js` continues to call `compileMetaForBranch` for every datasource and then applies its own `summarizeCube` post-processing. +- `metaSingleCube.js` calls `compileMetaForBranch` once, filters by `name === req.params.cubeName`, and returns the full envelope (`SingleCubeMeta` per data-model §2.8). 404 with `cube_not_found` if absent. + +**Alternatives considered**: + +- Reuse `metaForDatasource` and teach the single-cube route to unsummarize — impossible, the summarized output discards the data we need. +- Accept summarized output as sufficient for the single-cube contract — violates the stated utility ("inspect members without a second round-trip"); reduces the endpoint to a name-lookup that the caller could already do from `/meta-all`. + +--- + +## R7 — Version diff uses a new `diffVersions` adapter over `diffModels` + +**Decision**: Write a new helper `services/cubejs/src/utils/versionDiff.js` that produces the `{addedCubes, removedCubes, modifiedCubes}` shape the contract demands. Internally it iterates matched-by-name cube pairs across the two versions and delegates the **per-cube** field-level diff to the existing `diffModels.js` helper. `diffModels` alone does not match the contract. + +**Rationale**: Inspection of `services/cubejs/src/utils/smart-generation/diffModels.js:399` confirms the function: + +- Takes **two model documents** (existing + new) and a `mergeStrategy` — it is a *merge-preview* helper, not a *version-diff* helper. +- Returns `{fields_added, fields_updated, fields_removed, fields_preserved, blocks_preserved, ai_metrics_*, summary}` — a **flat field inventory across all cubes**, not the per-cube structure `addedCubes / removedCubes / modifiedCubes` required by FR-011 and contracts/version-diff.yaml. +- Has bespoke merge semantics (auto/merge/replace, AI-field preservation, user-content preservation) that are irrelevant to a version-to-version diff. + +Adapter algorithm (in `versionDiff.js`): + +```js +export async function diffVersions({ fromDataschemas, toDataschemas }) { + const fromByFile = indexBy(fromDataschemas, "name"); + const toByFile = indexBy(toDataschemas, "name"); + + const addedCubes = [], removedCubes = [], modifiedCubes = []; + + for (const [file, toRow] of toByFile) { + if (!fromByFile.has(file)) { + for (const cube of parseCubes(toRow.code)) addedCubes.push({ cubeName: cube.name, file }); + continue; + } + const fromRow = fromByFile.get(file); + if (fromRow.checksum === toRow.checksum) continue; // byte-identical, skip + + // Per-file cube-level diff. diffModels gives field-level; we collapse per cube. + const perCube = diffModelsToPerCube(fromRow.code, toRow.code); + for (const cube of perCube) { + if (cube.status === "modified") modifiedCubes.push(cube); + } + } + for (const [file, fromRow] of fromByFile) { + if (!toByFile.has(file)) { + for (const cube of parseCubes(fromRow.code)) removedCubes.push({ cubeName: cube.name, file }); + } + } + return { addedCubes, removedCubes, modifiedCubes }; +} +``` + +`diffModelsToPerCube` runs `diffModels(fromRow.code, toRow.code, "replace")` once per file pair and re-groups its flat `fields_added` / `fields_updated` / `fields_removed` arrays back into per-cube `CubeFieldChange` records (grouping by the `cube` attribute that every entry already carries — see `diffModels.js:118`). + +Both versions must belong to the same branch (FR-012). Before running the adapter, a single GraphQL query resolves `branch_id` for both version ids; mismatch → 400 with `diff_cross_branch`. + +**Alternatives considered**: + +- Use `diffModels` directly and reshape the flat output in the handler — same adapter work, but hides the logic inside the route file where it is untestable in isolation. +- Write a bespoke version differ that ignores `diffModels` — duplicates the YAML/JS parsing logic already in `diffModels.js` and `parseCubesFromJs`. +- Change the contract to expose the flat `fields_added / fields_updated / fields_removed` shape directly — rejected by FR-011 ("identifying added, removed, and modified cubes"). + +--- + +## R8 — Audit record emission + +**Decision**: Rely on Hasura's existing event triggers. Add trigger definitions for `dataschemas.delete` and `versions.insert (rollback origin)` mirroring the current `generate_dataschemas_docs` trigger on `versions`. + +**Rationale**: Constitution §IV (Security by Default) requires audit for mutating operations, and `tables.yaml` already uses event triggers for generation, cron task creation, and doc generation. Our additions: + +- `dataschemas.delete` trigger → `POST {ACTIONS_URL}/rpc/audit_dataschema_delete` (a trivial RPC handler in `services/actions/src/rpc/auditDataschemaDelete.js` that logs the event with `session_variables`, `data.old`, and a timestamp). +- `versions.insert` existing trigger (`generate_dataschemas_docs`) already fires on insert; we extend the Actions handler to distinguish rollback-origin inserts by checking for a new column `versions.origin` (values: `user`, `smart_gen`, `rollback`). Migration adds the column with default `user`. + +The **refresh** endpoint is not persisted in Hasura (cache invalidation is in-memory), so its audit record is emitted directly from the handler via a `fetchGraphQL` insert into an existing `audit_logs` collection (already present if any) or `console.log` structured line (if no audit table exists). Phase 1 design confirms which path applies. + +**Alternatives considered**: + +- New `audit_logs` table — introduces a new table; violates YAGNI given Hasura event triggers already capture the two DB-backed mutations. +- Log-only via stdout — acceptable for refresh (no DB row changes), not acceptable for delete/rollback per constitution. + +--- + +## R9 — StepCI test corpus for SC-003 (zero false-negative rate) + +**Decision**: Build a fixture directory at `tests/stepci/workflows/model-management/fixtures/` with six seed scenarios: + +1. **valid-append** — new cube that compiles cleanly alongside a simple two-cube branch. +2. **dangling-join** — draft references a cube that does not exist. +3. **circular-extends** — `extends: A` while A already extends the draft. +4. **measure-to-measure-typo** — `{CUBE}.total` where the target measure is `totals`. +5. **preagg-reference-break** — existing pre-aggregation references `TARGET.metric`; submitting `preview-delete` for TARGET should surface this. +6. **filter-params-orphan** — `FILTER_PARAMS.TARGET.dim` in another cube; deletion of TARGET must be blocked. + +Each fixture ships as a pair of files (branch-seed + draft) plus the expected error-code and `reference_kind`. The workflow loads the branch through Hasura, calls the endpoint, and asserts the response shape. + +**Rationale**: SC-003 demands zero false-negatives against a curated corpus; this corpus is the corpus. It also exercises every reference kind enumerated in FR-008. + +--- + +## R10 — Auth routing: branch-scoped vs datasource-scoped + +**Decision**: Route the branch-scoped endpoints through a **direct-verify** handler-local auth path (mirroring `metaAll.js:127-141` / `discover.js:114-125`). Keep `checkAuthMiddleware` only for endpoints that genuinely need a datasource header. + +**Rationale**: `services/cubejs/src/utils/checkAuth.js:72-79` hard-throws 400 when `x-hasura-datasource-id` is absent: + +```js +if (!dataSourceId) { + const error = new Error("400: No x-hasura-datasource-id provided, …"); + error.status = 400; throw error; +} +``` + +None of the branch-scoped endpoints require a datasource header on their request contract: + +| Endpoint | Contract header | Auth mechanism | +|---|---|---| +| `POST /api/v1/validate-in-branch` | branchId in body | **direct-verify** (no middleware) | +| `POST /api/v1/internal/refresh-compiler` | branchId in body | **direct-verify** | +| `DELETE /api/v1/dataschema/:id` | dataschemaId in path | **direct-verify** + server-side lookup of datasource/branch from the dataschema | +| `GET /api/v1/meta/cube/:cubeName` | datasourceId + optional branchId headers | `checkAuthMiddleware` **(keeps working because the contract mandates the datasource header)** | +| `POST /api/v1/version/diff` | version ids in body | **direct-verify** + server-side resolution of branch | +| `POST /api/v1/version/rollback` | branchId + versionId in body | **direct-verify** + server-side resolution of datasource | + +The direct-verify path each handler runs: + +```js +const token = req.headers.authorization?.replace(/^Bearer /, ""); +const tokenType = detectTokenType(token); +let payload, userId; +if (tokenType === "workos") { payload = await verifyWorkOSToken(token); userId = await provisionUserFromWorkOS(payload); } +else if (tokenType === "fraios") { payload = await verifyFraiOSToken(token); userId = await provisionUserFromFraiOS(payload); } +else { /* HS256 Hasura fallback for legacy frontend calls */ } +const user = await findUser({ userId }); +// resolve datasource/branch from the request-specific key (branchId, dataschemaId, versionId) +// enforce partition via resolvePartitionTeamIds(user.members, payload.partition) +``` + +This is the same pattern `metaAll.js` and `discover.js` already use. No new middleware. + +**Alternatives considered**: + +- Relax `checkAuthMiddleware` to make the datasource header optional — unacceptable: every existing datasource-scoped route relies on the 400 as a precondition for `defineUserScope`, and loosening it would compromise Multi-Tenancy First (Constitution §II). +- Require agents to pass `x-hasura-datasource-id` on every branch-scoped route — pushes a lookup onto the client that the server already has to do itself (dataschema→version→branch→datasource). Also fails for `validate-in-branch` (datasource context is irrelevant to pure compile). + +--- + +## R11 — `fetchGraphQL` error collapsing and granular status codes + +**Decision**: Add an opt-in `{ preserveErrors: true }` mode to `services/cubejs/src/utils/graphql.js` `fetchGraphQL`. In that mode the helper returns `{ data, errors, status }` without throwing, exposing Hasura's original error codes and HTTP status so routes that need FR-017-compliant mapping can surface `delete_blocked_authorization`, `cube_not_found`, etc. + +**Rationale**: The current helper (`graphql.js:27-31`) collapses every GraphQL-level `errors[]` into a generic 503: + +```js +if (res.errors) { + const error = new Error(JSON.stringify(res.errors)); + error.status = 503; + throw error; +} +``` + +`503` conflates three distinct failures — Hasura is down, the caller lacks permission, the row doesn't exist — and FR-017 mandates stable codes for each. Under the existing helper, `deleteDataschema` cannot distinguish "caller unauthorized" (should emit `delete_blocked_authorization`) from "Hasura unreachable" (should propagate as 503). + +Extension sketch: + +```js +export const fetchGraphQL = async (query, variables, authToken, { preserveErrors = false } = {}) => { + // … existing logic … + if (res.errors) { + if (preserveErrors) return { data: res.data ?? null, errors: res.errors, status: result.status }; + const error = new Error(JSON.stringify(res.errors)); + error.status = 503; + throw error; + } + return res; +}; +``` + +Callers that need granular mapping opt in; every existing call site keeps the old throwing behaviour. Handlers using the new mode inspect `errors[0].extensions.code` — Hasura emits `permission-error`, `not-exists`, `constraint-violation`, etc., which map cleanly to our stable codes via a small `mapHasuraErrorCode()` lookup. + +**Alternatives considered**: + +- Bypass `fetchGraphQL` and call Hasura via `fetch` directly in every route that needs granular errors — duplicates the auth-header handling and `HASURA_ENDPOINT` resolution already centralised in the helper. +- Rewrite `fetchGraphQL` to always return structured errors — would break every existing caller that relies on the throw-on-error contract. Opt-in is the constitution-friendly change. + +--- + +## R12 — Durable audit store + +**Decision**: Add a new `audit_logs` table to the Hasura PostgreSQL schema. Delete and rollback record rows into it via Hasura event triggers that call existing Actions RPC handlers (one per operation). Ninety-day retention enforced by a scheduled cleanup job (new Hasura cron trigger in the same migration). + +**Schema**: + +```sql +CREATE TABLE audit_logs ( + id uuid PRIMARY KEY DEFAULT gen_random_uuid(), + action text NOT NULL CHECK (action IN ('dataschema_delete', 'version_rollback')), + user_id uuid NOT NULL REFERENCES users(id) ON DELETE CASCADE, + datasource_id uuid REFERENCES datasources(id) ON DELETE SET NULL, + branch_id uuid REFERENCES branches(id) ON DELETE SET NULL, + target_id uuid NOT NULL, + outcome text NOT NULL CHECK (outcome IN ('success', 'failure')), + error_code text, + payload jsonb, + created_at timestamptz NOT NULL DEFAULT now() +); + +CREATE INDEX audit_logs_created_at_idx ON audit_logs (created_at DESC); +CREATE INDEX audit_logs_user_id_idx ON audit_logs (user_id); +CREATE INDEX audit_logs_action_idx ON audit_logs (action); +``` + +**Two-path write model** (consequence of FR-016's success-and-failure requirement): + +1. **Success path — Hasura event trigger.** When a mutation commits (`dataschemas.delete` succeeds, or `versions.insert with origin='rollback'` succeeds), Hasura's existing event-trigger infrastructure fires a webhook to an Actions RPC handler that writes `outcome='success'` into `audit_logs`. This path survives handler crashes between the mutation and the HTTP response — Hasura guarantees the trigger fires as long as the row committed. +2. **Failure path — direct handler write.** Every rejection branch inside the handlers (authorization denied, blocked-by-references, blocked-by-historical-version, source-columns-missing, partition mismatch, Hasura permission-error) calls a new in-process helper `writeAuditLog({action, userId, branchId, datasourceId, targetId, outcome:'failure', errorCode, payload})`. The helper performs an admin-secret INSERT into `audit_logs`. Two-path coverage is required because Hasura event triggers only fire on **committed** row changes — a rejected delete never reaches the trigger, a blocked rollback never inserts a `versions` row, and every non-Hasura rejection (FR-008 blocked refs, partition gate, `rollback_source_columns_missing`) is pure handler logic. Without the failure-path write, SC-007's zero-dropped-records claim fails immediately on the first blocked request. + +**Hasura metadata**: + +- Add `audit_logs` to `tables.yaml` with **no** `user` role permissions (admin-only). Agents cannot read or write the audit log directly. +- Add event triggers `delete_dataschema_audit` on `dataschemas.delete` and `version_rollback_audit` on `versions.insert where origin = 'rollback'`. Both point at Actions RPC handlers that INSERT with `outcome='success'`. + +**Retention**: A cron trigger fires daily: `DELETE FROM audit_logs WHERE created_at < now() - interval '90 days'`. + +**Rationale**: + +- Gives FR-016/SC-007 a concrete, queryable sink — "zero dropped records over a week" is testable against row count. +- Reuses the existing Hasura event-trigger + Actions RPC pattern already used by `generate_dataschemas_docs` and `create_cron_task_by_alert`; no new transport. +- Admin-only permission prevents agents from tampering or mining the audit log. +- Two-path model survives both handler crashes (success still recorded via trigger) and Hasura unavailability (failure attempts still recorded via handler, with `outcome='failure'` and `error_code='hasura_unavailable'` when the handler's own admin-secret write fails its retry loop). + +**Alternatives considered**: + +- Success-only recording via triggers — does not satisfy FR-016's "each attempted operation" requirement. Zero authorisation-failure visibility. Rejected. +- Failure-only recording in handlers — loses the crash-survival property that event triggers give for commits. Rejected. +- Emit structured stdout lines and scrape with the existing log aggregator — non-durable, not queryable, no retention guarantees. +- Reuse an existing table — none exists with the needed shape. + +--- + +## R13 — Single `ErrorCode` enum across all contracts (FR-017 enforcement) + +**Decision**: Every contract file under `contracts/` declares an identical `ErrorCode` schema as an `enum` with the full 13-code list. Every `code` field in every response body `$ref`s `#/components/schemas/ErrorCode`. Every non-2xx response declares an explicit schema with `{code: ErrorCode, message: string}` — no free-form strings, no schemaless error responses. Cross-contract consistency is enforced by a build-time lint task (T050) that diffs the `ErrorCode.enum` list across the six contract files and fails CI on drift. + +**Rationale**: FR-017 requires "a single importable enumeration." Prior draft had three mismatches: + +1. `validate-in-branch.yaml` declared `code: {type: string}` with no `enum` — any typo passes OpenAPI validation. +2. Some non-2xx responses (e.g. delete-dataschema 403) carried a prose description but **no schema**, so the wire contract for that failure code is undocumented. +3. `delete_blocked_authorization` lived in the 409 `enum` even though its semantic (missing owner/admin role) is a 403 concern — it was both in the 403 prose and the 409 `enum`. Two codes for one condition. + +New layout (per contract file): + +```yaml +components: + schemas: + ErrorCode: + type: string + enum: + - validate_invalid_mode + - validate_target_not_found + - validate_unresolved_reference + - refresh_branch_not_visible + - refresh_unauthorized + - delete_blocked_by_references + - delete_blocked_historical_version + - delete_blocked_authorization + - cube_not_found + - diff_cross_branch + - diff_invalid_request + - rollback_version_not_on_branch + - rollback_invalid_request + - rollback_source_columns_missing + ErrorResponse: + type: object + required: [code, message] + properties: + code: { $ref: '#/components/schemas/ErrorCode' } + message: { type: string } +``` + +Then every non-2xx response references `ErrorResponse`: + +```yaml +"403": + description: Authorization or authentication failure. + content: + application/json: + schema: { $ref: '#/components/schemas/ErrorResponse' } +``` + +**Rehoming of `delete_blocked_authorization`**: it moves to **403 only**. The 409 enum for `delete-dataschema` is narrowed to `[delete_blocked_by_references, delete_blocked_historical_version]`. The code is not dropped — it still appears in the shared `ErrorCode` enum and is returned on the 403 response body. + +**Alternatives considered**: + +- External `$ref` to a single `common-errors.yaml` — cleaner in theory, but OpenAPI tooling across the project (the skill's YAML loader included) varies in cross-file `$ref` support. Duplicating the enum per file with a CI lint is more portable. +- Derive the enum from `errorCodes.js` at build time via a codegen step — premature. The enum has 14 entries and changes infrequently. + +--- + +## R14 — Refresh authorization bar + +**Decision**: Refresh requires **owner or admin** role on the target datasource's team, the same bar as delete and rollback. Team membership alone is insufficient. + +**Rationale**: Refresh evicts the compiler-cache entries that **every other user** of the branch relies on. The next compile hits happen on the next query, and the cost is paid by whoever issues that query — including end-users querying dashboards who did not request the refresh. A team member with read-only access who calls refresh in a tight loop effectively DoSes the compile path for the whole team. + +Symmetry with other mutating operations: + +| Operation | Affects | Current bar | +|---|---|---| +| `delete-dataschema` | DB row (persistent) | owner/admin | +| `version-rollback` | DB rows (persistent) | owner/admin | +| `refresh-compiler` | **other users' compiled view** (transient but observable) | owner/admin (this decision) | +| `validate-in-branch` (append) | nothing | member | +| `validate-in-branch` (replace / preview-delete) | nothing persistent, but signals intent | owner/admin | +| `version-diff` | nothing | member | +| `meta/cube/:cubeName` | nothing | member | + +Enforced in the handler (T022) by checking that `user.members` contains an entry for the resolved datasource's team with `member_roles.team_role IN (owner, admin)` — the same helper used by delete and rollback handlers. + +**Alternatives considered**: + +- Keep the team-member bar — symmetry argument fails: refresh is the only "mutating" operation that would accept a lower bar. +- Rate-limit refresh instead of raising the bar — adds a rate-limiter dependency; owner/admin gate is a zero-dependency solution. + +--- + +## Summary of resolutions + +| Area | Resolution | Spec anchor | +|---|---|---| +| R1 — cache invalidation | LRU-scoped delete by `schemaVersion` suffix | FR-004, Clarification Q1 | +| R2 — contextual validate | `prepareCompiler` + in-memory repo, three modes | FR-001–FR-003 | +| R3 — reference scan | Textual patterns + compiler probe hybrid | FR-008 | +| R4 — delete permission | Trigger-maintained `versions.is_current` + owner/admin filter; handler re-checks | FR-006, FR-007 | +| R5 — rollback | Clone dataschemas into new version; no cascade | FR-013, FR-013a | +| R6 — single-cube meta | New `compileMetaForBranch` helper; full envelope, honours branch header | FR-009, FR-010 | +| R7 — diff | New `diffVersions` adapter wrapping `diffModels` per file pair | FR-011, FR-012 | +| R8 — audit | Two-path (trigger for success, handler for failure) → `audit_logs` | FR-016 | +| R9 — test corpus | Six fixtures under `tests/stepci/workflows/model-management/fixtures/` | SC-003 | +| R10 — auth routing | Direct-verify for branch-scoped endpoints; middleware only for datasource-scoped | FR-015 | +| R11 — `fetchGraphQL` mode | Opt-in `preserveErrors` for granular FR-017 status mapping | FR-017 | +| R12 — `audit_logs` table | New admin-only table with 90-day retention via cron; success + failure writers | FR-016, SC-007 | +| R13 — shared `ErrorCode` enum | All contracts reference one enum; all non-2xx responses have schemas; `delete_blocked_authorization` rehomed to 403 | FR-017 | +| R14 — refresh auth bar | Owner/admin (symmetric with delete + rollback) | FR-015 | + +All open technical unknowns resolved. Phase 1 design may proceed. diff --git a/specs/011-model-mgmt-api/spec.md b/specs/011-model-mgmt-api/spec.md new file mode 100644 index 00000000..d06d25fb --- /dev/null +++ b/specs/011-model-mgmt-api/spec.md @@ -0,0 +1,188 @@ +# Feature Specification: Model Management API + +**Feature Branch**: `011-model-mgmt-api` +**Created**: 2026-04-20 +**Status**: Draft +**Input**: User description: "Model Management API" + +## Context + +Synmetrix exposes a semantic-layer platform whose core asset is a **cube model** — a YAML or JS file describing dimensions, measures, segments, and pre-aggregations over a source table. Models live inside *dataschemas*, which are grouped into immutable *versions*, which belong to *branches* attached to a *datasource*. + +External agents (notably Tychi, an AI modelling assistant) already hold FraiOS-minted JWTs and can discover catalog state through the aggregated metadata endpoint, read/write dataschemas through the GraphQL proxy, and run profiling/smart-generation through existing endpoints. A gap analysis identified four missing capabilities that block an agent from owning the full author-to-publish lifecycle without human intervention: + +1. No way to validate a draft model **in the context of** sibling cubes already deployed on a branch. +2. No way to invalidate the compiled-model cache after an in-place edit — stale models keep serving until a new version is inserted. +3. No way to remove a cube; only additive writes are supported today. +4. No first-class rollback, diff, or single-cube metadata query — agents must hand-assemble these from lower-level primitives. + +The Model Management API closes those gaps so an authenticated agent can author, validate, persist, publish, compare, revert, and retire semantic-layer models without operator assistance. + +## Clarifications + +### Session 2026-04-20 + +- Q: Refresh cache blast radius → A: Compiler cache only, scoped to the target branch's dataschemas. Pre-aggregation cache and user-scope caches are untouched. +- Q: Refresh execution model (sync vs async) → A: Asynchronous invalidation. The endpoint evicts cache entries and returns immediately; the next metadata or query request triggers recompilation and surfaces any compile errors on that downstream request. +- Q: Rollback blast radius → A: Dataschemas only. The new version contains cloned dataschemas; explorations, alerts, and other version-bound entities are untouched and keep pointing at their original version. +- Q: Cross-cube reference types that block deletion → A: All compiler-resolved references by cube-qualified name — joins, extends, sub_query references, measure/dimension formula references, segment inheritance, pre-aggregation rollup references, and FILTER_PARAMS..* self-references. +- Q: Draft mode vocabulary for contextual validation → A: Keep the proposed vocabulary — append, replace, preview-delete. It describes what change the draft represents; the smart-generation mergeStrategy (merge/replace/auto) describes how to combine outputs and is a separate concern. + +## User Scenarios & Testing *(mandatory)* + +### User Story 1 - Contextual Validation Before Publish (Priority: P1) + +A modelling agent has drafted a new or updated cube file. Before persisting it, the agent needs to know whether the draft compiles cleanly **alongside** the cubes already published on the target branch, because most compile errors arise from cross-cube references (shared joins, segment inheritance, measures that reference measures on other cubes) that a file-local syntax check cannot detect. + +**Why this priority**: This is the single largest blocker to autonomous model edits. Without it every agent-authored change is gated on a human restarting the compiler service and inspecting logs. It unlocks the entire author-validate-publish loop. + +**Independent Test**: Submit a draft file plus a target branch identifier; receive a deterministic pass/fail report that lists compile errors with file, line, and message. Works with the existing authenticated flow; no other new capability is required. + +**Acceptance Scenarios**: + +1. **Given** a syntactically valid draft that references a dimension defined in another deployed cube, **When** the agent submits it for contextual validation against the active branch, **Then** the response confirms the draft compiles successfully in that branch context. +2. **Given** a draft whose identifier collides with an existing cube in the branch, **When** the agent submits it with mode "append", **Then** the response lists the collision with the conflicting cube name and file. +3. **Given** a draft that references a dimension which does not exist on any cube in the branch, **When** the agent submits it, **Then** the response flags the unresolved reference with line and column of the offending token. + +--- + +### User Story 2 - Force Model Refresh After Edit (Priority: P1) + +An agent edits a deployed dataschema in place (same dataschema identifier, new source code). Downstream users querying the load endpoint continue to receive results from the previously compiled model because the compiler cache is keyed by dataschema identifier, not content. The agent needs a single request that guarantees subsequent queries use the updated model. + +**Why this priority**: Without it, the only reliable refresh path is to insert a whole new version, which inflates version history, churns the UI, and invalidates exploration URLs that embedded the previous version identifier. Fixes a correctness issue already flagged in the Tychi skill's caveats. + +**Independent Test**: After editing a cube's source-code column in the database, call the refresh endpoint for that branch; subsequent metadata and query requests must reflect the new definition within a bounded refresh interval. + +**Acceptance Scenarios**: + +1. **Given** a deployed cube with measure `total_revenue`, **When** the agent updates the dataschema's source code to add measure `average_revenue` and calls the refresh endpoint for that branch, **Then** the next catalog discovery response includes `average_revenue` for that cube. +2. **Given** a refresh request for a branch the caller cannot access, **When** the request is authenticated but unauthorised, **Then** the response is rejected without side effects. + +--- + +### User Story 3 - Remove a Cube from the Active Model (Priority: P1) + +Agents occasionally need to retire a cube — because it was a draft, because the source table was dropped, or because a refactor merged it into another cube. Today the only way to remove a cube is to author a new version that omits it, which forces a full-snapshot write for a deletion semantic. + +**Why this priority**: Without a deletion primitive, refactor workflows balloon: renaming a cube leaves the old cube visible because the previous dataschema row is still attached to the active version. Required for the refinement passes that the Tychi skill already expects to perform. + +**Independent Test**: Call the deletion endpoint for a specific dataschema; after the call, the cube no longer appears in the catalog for that branch, and a subsequent query for that cube returns "not found". + +**Acceptance Scenarios**: + +1. **Given** a branch with three cubes, **When** the agent deletes one dataschema by identifier, **Then** the next catalog discovery returns only the remaining two cubes on that branch. +2. **Given** a dataschema the caller has no owner or admin role over, **When** the agent attempts deletion, **Then** the request is rejected and the cube remains intact. +3. **Given** a deletion request for a dataschema that belongs to a non-active (historical) branch version, **When** the request is submitted, **Then** the historical version remains immutable and the request is rejected with an explanatory error. + +--- + +### User Story 4 - Inspect a Single Cube's Compiled Definition (Priority: P2) + +Agents reviewing or refining an existing cube frequently need only that cube's compiled metadata (measures, dimensions, segments, hierarchies, annotations). Today the only option is to fetch every cube's metadata for the datasource and filter client-side, which wastes bandwidth and forces the agent to hold state it will not use. + +**Why this priority**: Quality-of-life improvement. Reduces payload by one to two orders of magnitude on large datasources and simplifies agent code. Not a blocker. + +**Independent Test**: Request a specific cube by name on a specific branch; receive only that cube's compiled metadata envelope, or a "not found" response if it does not exist. + +**Acceptance Scenarios**: + +1. **Given** a branch containing a cube named `orders`, **When** the agent requests the single-cube metadata for `orders`, **Then** the response contains exactly the `orders` envelope with its measures, dimensions, segments, and meta block. +2. **Given** a cube name that does not exist on the branch, **When** the agent requests single-cube metadata, **Then** the response is a clean "not found" with the branch context included. + +--- + +### User Story 5 - Diff and Roll Back Between Versions (Priority: P2) + +When an agent publishes a breaking change, the user needs a way to see what changed and — if necessary — revert to the prior version without hand-copying source code. Likewise, agents want to present a diff to the user before committing as an explicit confirmation step. + +**Why this priority**: Important for trust and safety of agent-driven edits, but workflows can continue without it (diff can be computed externally). Rollback is low-frequency but high-consequence. + +**Independent Test**: Fetch a diff between two version identifiers on the same branch; receive an itemised list of added, removed, and modified cubes. Call rollback with a target version identifier; the branch's active version becomes the target's content. + +**Acceptance Scenarios**: + +1. **Given** two versions on the same branch that differ by one cube, **When** the agent requests a diff between them, **Then** the response identifies which cubes were added, removed, and modified, with the changed fields for each. +2. **Given** a branch whose current active version has introduced a regression, **When** the agent requests rollback to the prior version, **Then** a new version identical in content to the prior one is inserted and made active on the branch. + +--- + +### Edge Cases + +- Validation of a draft whose file name matches an existing dataschema but whose cube identifier differs — collision is on the file name, not the cube name; must report both facts. +- Refresh called against a branch that has never been compiled (no cached entry) — must be a successful no-op, not an error. +- Deletion of a cube currently referenced by another cube on the same branch (through any of the reference kinds enumerated in FR-008) — must reject with a dependency explanation rather than leaving a dangling reference. +- Rollback to a version that predates a source-schema migration — must reject if the referenced source columns no longer exist in the datasource. +- Single-cube metadata request where the user has partial visibility (row-level security hides some measures) — must return only the visible subset, matching the filtering applied by the aggregate metadata endpoint. +- Diff between versions on different branches — must reject; diffs are scoped to a single branch's history. +- Concurrent edit plus refresh plus query — refresh must be idempotent under concurrency and must not serve a partially compiled view. + +## Requirements *(mandatory)* + +### Functional Requirements + +- **FR-001**: The system MUST allow an authenticated agent to submit a draft model file and receive a pass/fail compile report that reflects compilation in the context of all other cubes currently published on a specified branch. +- **FR-002**: The contextual validation MUST distinguish between three draft modes — "append" (draft is a new cube not yet on the branch), "replace" (draft overwrites an existing dataschema's source code), and "preview-delete" (draft represents removal of a cube) — and MUST report any naming or reference conflicts specific to the chosen mode. +- **FR-003**: The contextual validation response MUST include a structured list of errors and warnings, each with file name, line number, column, severity, and human-readable message. +- **FR-004**: The system MUST allow an authenticated agent to force a refresh of the **compiled-model cache** entries belonging to a specific branch's dataschemas, such that subsequent metadata and query requests reflect the current database state within the bound defined in SC-002. The refresh MUST NOT invalidate the pre-aggregation cache or user-scope caches of any caller. Refresh is a cache-only operation and is NOT classified as a mutating operation for FR-016 purposes; a non-durable structured log line captures it for operational visibility. +- **FR-004a**: The refresh operation MUST be asynchronous: the endpoint evicts the relevant cache entries and returns success as soon as eviction is complete, without blocking on recompilation. The next metadata or query request for the branch triggers recompilation, and any compile failure surfaces on that downstream request — not on the refresh response itself. +- **FR-005**: The refresh operation MUST be idempotent **per (branch, schemaVersion) pair**. Two refresh calls against the same `(branch, schemaVersion)` MUST do the same work: the first evicts the cache entries for that `schemaVersion`, and the second finds nothing to evict. If the branch's dataschemas change between the two calls (new `schemaVersion`), the second call is a different logical operation and is permitted to evict the new set. Idempotence is NOT wall-clock-bound. +- **FR-006**: The system MUST allow an authenticated agent with owner or admin authority over a datasource to delete a specific dataschema, subject to FR-007 and FR-008. +- **FR-007**: The system MUST refuse deletion requests that target a dataschema attached to any version that is **not the latest version of the currently active branch**. All older versions of the active branch are immutable snapshots; all versions of any non-active branch are likewise immutable. Only dataschemas attached to the single currently-compiled version (the newest version on the active branch) are eligible for deletion. +- **FR-008**: The system MUST refuse deletion requests that would leave dangling cross-cube references. A blocking reference is any reference to the target cube by its cube-qualified name from another cube on the same branch, including: (a) `joins` entries, (b) `extends` chains, (c) `sub_query` measures/dimensions, (d) measure-to-measure and dimension-to-dimension formula references, (e) segment inheritance, (f) pre-aggregation rollup references, and (g) `FILTER_PARAMS..*` self-references. The error response MUST identify each blocking reference by referring cube name, file, and the reference kind from this list. +- **FR-009**: The system MUST allow an authenticated agent to request the compiled metadata for a single named cube on a specific branch, and MUST return a "not found" response (not an empty list) when the cube does not exist. +- **FR-010**: The system MUST apply the same visibility and access-list filtering to single-cube metadata requests as it applies to the aggregate catalog endpoint. +- **FR-011**: The system MUST allow an authenticated agent to request a structured diff between any two versions on the same branch, identifying added, removed, and modified cubes and — for modifications — the names of the changed measures, dimensions, and segments. +- **FR-012**: The system MUST refuse diff requests spanning versions on different branches and MUST return an explanatory error. +- **FR-013**: The system MUST allow an authenticated agent with owner or admin authority to roll a branch back to a prior version by creating a new version whose dataschemas are identical in content to the target version. The newly inserted version becomes the branch's active version by virtue of being the newest version on the branch — under the platform's existing "latest version wins" semantic; no explicit activation step is required. +- **FR-013a**: Rollback MUST clone **only** the dataschemas of the target version. It MUST NOT modify, clone, or rebind other entities that reference a version identifier (explorations, alerts, or any other version-bound records). Such entities remain associated with their original version identifiers. +- **FR-014**: Rollback MUST preserve the full version history; it MUST NOT delete or modify the intervening versions. +- **FR-015**: All operations MUST accept the same authentication tokens already accepted by the existing catalog and discovery endpoints, and MUST apply the caller's team partition and access list before any action. **Every mutating operation** — `validate-in-branch` with `mode != append`, `refresh-compiler`, `delete-dataschema`, `version-rollback` — MUST additionally require **owner or admin** role on the target datasource's team. Refresh requires the same bar as delete and rollback because it affects the compiled view that every other user of the branch sees. Read-only operations (`meta/cube/:cubeName`, `version/diff`, and `validate-in-branch` with `mode == append`) require only team membership. +- **FR-016**: The system MUST persist a **durable audit record** for each attempted persistent mutating operation — delete and rollback — **on every outcome path, success and failure alike**. A record MUST be written when the operation is rejected by authorization, blocked by cross-cube references (FR-008), blocked by historical-version immutability (FR-007), rejected by source-column drift (rollback), rejected by partition gate (FR-015), or Hasura-rejected, as well as on success. Each record captures: caller user identity, action, branch identifier, datasource identifier, target identifier, outcome (`success` or `failure`), error code (non-null when `outcome = failure`), an operation-specific JSON payload, and the timestamp. The audit store MUST be queryable and retained for at least ninety days. Refresh is exempt (see FR-004) because it is a cache-only operation; refresh emits a non-durable structured log line for operational visibility only. +- **FR-017**: The system MUST return machine-readable, stable string error codes for every failure mode exposed by the API. Every code MUST appear in at least one OpenAPI contract under the feature's `contracts/` directory, and the full set of codes MUST be exposed as a single importable enumeration for client use. + +### Key Entities + +- **Dataschema**: A single model file authored by a user or an agent. Attributes: identifier, file name, source code, checksum, owning user, attached version. +- **Version**: An immutable snapshot of dataschemas on a branch. Attributes: identifier, parent branch, creation time, authoring user, attached dataschemas. Versions are never mutated after creation. +- **Branch**: A named workspace on a datasource. Attributes: identifier, name, status (`active` | `created` | `archived`), parent datasource. One branch per datasource carries the "active" status at a time; the other values denote non-active (historical) state. +- **Datasource**: A connected database exposed for modelling. Attributes: identifier, name, database type, owning team. +- **Compile Report**: The structured result of attempting to compile a set of dataschemas together. Attributes: validity flag, error list, warning list; each error or warning carries file, line, column, severity, code, and message. +- **Version Diff**: The structured result of comparing two versions on the same branch. Attributes: added cubes, removed cubes, modified cubes, and per-modification field-level changes. +- **Audit Record**: A durable log entry describing a persistent mutating action. Persisted in a dedicated audit store. Attributes: identifier, timestamp, action, caller user identity, branch identifier, datasource identifier, target identifier, outcome, error code (nullable), and an opaque JSON payload for operation-specific detail. Retained for at least ninety days. + +## Success Criteria *(mandatory)* + +### Measurable Outcomes + +- **SC-001**: An agent can take a draft cube from authored to published on an active branch — with contextual validation, refresh, and (when applicable) deletion of a predecessor cube — without a human operator touching the server, in under five minutes end-to-end for a typical single-cube change. +- **SC-002**: After any in-place dataschema edit followed by the refresh operation, the next catalog or query response reflects the new definition at least ninety-five percent of the time within ten seconds, and one hundred percent of the time within sixty seconds. +- **SC-003**: Contextual validation correctly identifies every cross-cube reference error that the full compiler would raise on deploy; the false-negative rate (validator passes but deploy fails) is zero measured over a curated corpus of known-broken drafts. +- **SC-004**: Rollback to any prior version on a branch completes in under thirty seconds and produces a version whose content is byte-identical to the target version for every dataschema. +- **SC-005**: The single-cube metadata response is at least ninety percent smaller than the aggregate metadata response on datasources carrying ten or more cubes. +- **SC-006**: Deletion of a dataschema that leaves no dangling references removes the cube from the catalog within the refresh bound defined in SC-002. +- **SC-007**: Every attempted persistent mutating operation (delete, rollback — per FR-016) produces a durable audit record on **every outcome path** (success, authorization-rejected, blocked-by-references, blocked-by-historical-version, rejected-by-partition, Hasura-rejected, source-columns-missing), uniquely identifying the caller, the target, and the outcome, with zero dropped records measured over a one-week observation window. +- **SC-008**: No caller lacking owner or admin authority succeeds in performing a mutating operation; authorisation rejection rate is one hundred percent for unauthorised callers across the combined test suite. + +## Assumptions + +- The existing authentication verification paths, the partition filter, and the team-role authorisation pattern already applied by the GraphQL proxy and the catalog endpoints are reused without modification. +- **Agent identity role.** Any agent (including Tychi) that intends to call a mutating endpoint — `refresh-compiler`, `delete-dataschema`, `version-rollback`, or `validate-in-branch` with `mode=replace`/`preview-delete` — MUST connect with a FraiOS identity whose resolved Synmetrix user has **owner or admin** role on the target datasource's team. Read-only calls (`meta/cube/:cubeName`, `version/diff`, `validate-in-branch` with `mode=append`, and every existing catalog endpoint) require only team membership. This is a deployment precondition; the feature enforces the gate but does not provision the role. +- **Agent discovery path.** Agents resolve cube-name → `dataschema_id` via the two new fields (`dataschema_id`, `file_name`) added to every cube summary in `/api/v1/meta-all`. Agents resolve branch-version history (for diff and rollback) via direct Hasura GraphQL against the `versions` table. No dedicated list-versions endpoint is introduced; the Hasura query is already available to every authenticated caller through the GraphQL proxy. +- **Single-cube metadata is current-version only.** `/api/v1/meta/cube/:cubeName` always returns the latest version of the requested branch. Historical-version cube introspection remains available through Cube.js's built-in `/api/v1/meta` aggregate endpoint with `x-hasura-branch-version-id`. +- **FraiOS just-in-time provisioning.** The first call from a previously-unseen FraiOS identity triggers user + team + member + role provisioning (`provisionUserFromFraiOS`). This adds roughly one to two hundred milliseconds to that one call; subsequent calls hit the in-memory identity cache. +- Dataschema, branch, and version identifiers remain universally unique and opaque to agents; agents discover them through the existing aggregated catalog and discovery endpoints. +- The compile semantics used by contextual validation match those used by the query compiler during execution; validation draws from the same compiler library rather than a heuristic parser. +- Durable audit records (delete, rollback) are written to a new `audit_logs` table introduced by this feature's Hasura migration. Existing Hasura event triggers are the transport. No new dashboard is in scope, but the table MUST be selectable via the standard Hasura admin role so operators can query it directly. +- Cache refresh latency is bounded by the underlying compiler's cold-start time, which is already acceptable in the current deployment; this specification does not impose tighter performance targets than the current baseline. +- Rollback inserts a new version rather than mutating history, matching the existing immutability invariant on the version entity. +- Deletion semantics operate at the dataschema granularity. Removing an entire branch or datasource is out of scope and remains handled by existing permissions or operator tooling. + +## Out of Scope + +- Any changes to the datasource connection flow or credential vault behaviour. +- Branch creation, renaming, or publishing — already supported by existing permissions. +- Fine-grained field-level row security beyond what the existing access-list mechanism already enforces. +- Backfilling audit records for past mutations. +- Real-time push notification of model changes to other connected clients; existing polling through the catalog endpoint is sufficient. +- User interface or dashboard work; this specification is scoped to the server-side capability. diff --git a/specs/011-model-mgmt-api/tasks.md b/specs/011-model-mgmt-api/tasks.md new file mode 100644 index 00000000..d3d96b7f --- /dev/null +++ b/specs/011-model-mgmt-api/tasks.md @@ -0,0 +1,318 @@ +--- +description: "Implementation tasks for the Model Management API" +--- + +# Tasks: Model Management API + +**Input**: Design documents from `/specs/011-model-mgmt-api/` +**Prerequisites**: plan.md, spec.md, research.md, data-model.md, contracts/, quickstart.md + +**Tests**: Constitution §III (Test-Driven Development) is **NON-NEGOTIABLE**; every story includes StepCI workflow + Vitest unit tests authored **before** the matching implementation file. + +**Organization**: Tasks are grouped by user story so each story can be implemented, tested, and delivered independently. + +## Format: `[ID] [P?] [Story] Description` + +- **[P]**: Can run in parallel (different files, no dependencies on incomplete tasks) +- **[Story]**: Which user story the task belongs to (US1..US5) +- All file paths are repository-relative + +## Path Conventions + +Single backend service — all new code lives under: + +- `services/cubejs/src/routes/` — HTTP handlers +- `services/cubejs/src/utils/` — shared helpers +- `services/cubejs/src/routes/__tests__/` — Vitest unit tests +- `services/cubejs/src/utils/__tests__/` — Vitest unit tests +- `services/hasura/migrations/` — one migration dir for delete permission + `versions.origin` column +- `tests/stepci/workflows/model-management/` — StepCI workflow tests and fixtures + +--- + +## Phase 1: Setup (Shared Infrastructure) + +**Purpose**: Scaffolding for the new route files, test folder, and Hasura migration. No behavioural code yet. + +- [X] T001 Verified `services/cubejs/src/routes/__tests__/` already exists. Repo uses `node:test` (built-in), not Vitest — test discovery is implicit on `.test.js` files under that folder. +- [X] T002 [P] Created `tests/workflows/model-management/README.md` describing fixture layout + workflow entry points. (Path adapted to repo convention — `tests/workflows/` rather than `tests/stepci/workflows/`.) +- [X] T003 [P] Created `tests/workflows/model-management/fixtures/` with six SC-003 fixtures as YAML seed files. +- [X] T004 [P] Created migration folder `services/hasura/migrations/1713600000000_dataschemas_delete_permission/` with placeholder `up.sql` / `down.sql`. +- [ ] T005 [P] DEFERRED — `tables.yaml` metadata changes (delete-permission block + `versions.origin` + `audit_logs` + event triggers) will land atomically with T008 SQL so the migration folder's up/down are internally consistent. + +--- + +## Phase 2: Foundational (Blocking Prerequisites) + +**Purpose**: Shared utilities and the Hasura migration that every user story depends on. **No user story can start until this phase is green.** + +**⚠️ CRITICAL**: Complete all of T006–T013i before moving to Phase 3. T013h and T013i land atomically with the rest of Foundational so that downstream story tasks can rely on enriched `/api/v1/meta-all` output and Tychi users reading the skill during rollout see corrected auth facts. + +- [X] T006 [P] `compilerCacheInvalidator.test.js` — 5/5 passing against a fake LRU. +- [X] T007 [P] `referenceScanner.test.js` — 10/10 passing across all 7 reference kinds + self-reference guard + line numbers. +- [ ] T008 Author `services/hasura/migrations/1713600000000_dataschemas_delete_permission/up.sql` and `down.sql`. **Six** changes in one migration: + 1. `ALTER TABLE versions ADD COLUMN origin TEXT DEFAULT 'user' CHECK (origin IN ('user','smart_gen','rollback'))`. + 2. `ALTER TABLE versions ADD COLUMN is_current boolean NOT NULL DEFAULT true`. Backfill: `UPDATE versions SET is_current = false WHERE id NOT IN (SELECT DISTINCT ON (branch_id) id FROM versions ORDER BY branch_id, created_at DESC)`. + 3. Create trigger function `versions_flip_is_current` and `AFTER INSERT` trigger `versions_flip_is_current_trg` on `versions` per research.md §R4 — flips the previous `is_current=true` row on the same branch to `false` atomically. + 4. `CREATE TABLE audit_logs (…)` per data-model.md §1.5 (id uuid PK, action, user_id, datasource_id, branch_id, target_id, outcome, error_code, payload jsonb, created_at) with the three indexes. + 5. Hasura metadata for `dataschemas.delete_permissions` — filter on `version.is_current._eq: true AND version.branch.status._eq: active AND datasource.team.members.{member_roles.team_role._in: [owner, admin], user_id._eq: X-Hasura-User-Id}` (R4). + 6. Hasura metadata for `audit_logs`: admin-only permissions, relationships to `users`/`datasources`/`branches`. Plus cron trigger `audit_logs_retention_90d` in `services/hasura/metadata/cron_triggers.yaml` that runs daily and POSTs to `{{ACTIONS_URL}}/rpc/audit_logs_retention`. + Verify with `./cli.sh hasura cli "migrate apply"` in a clean environment. Write a unit-ish test `tests/stepci/workflows/model-management/is-current-invariant.yml` that inserts two versions on the same branch and asserts exactly one row has `is_current = true`. +- [X] T009 `utils/compilerCacheInvalidator.js` implemented — `invalidateCompilerForBranch(cubejs, schemaVersion)` iterates LRU keys, evicts suffix-matching appIds. +- [X] T010 `utils/referenceScanner.js` implemented — textual pattern scan across all seven FR-008 reference kinds. +- [X] T011 `dataSourceHelpers.js` extended with three new helpers using existing `fetchGraphQL` patterns; no new caches: + - `findVersionDataschemas({versionId})` — returns `[{id, name, code, checksum}]` via `dataschemas(where: {version_id: {_eq: $versionId}})`. + - `findVersionBranch({versionId})` — returns `{branchId, branchStatus, datasourceId, teamId}` via `versions_by_pk(id: $versionId) { branch { id status datasource { id team_id } } }`. + - `rollbackVersion({branchId, toVersionId, userId, authToken})` — four-step behaviour: (a) call `findVersionDataschemas(toVersionId)`; (b) strip `id`, recompute `checksum = md5(code)` for each cloned row; (c) `insert_versions_one(object: { branch_id, user_id, origin: "rollback", dataschemas: { data: [...] } })` called with `fetchGraphQL(…, { preserveErrors: true })` (T013c) so permission failures surface as mappable codes; (d) return `{newVersionId, clonedDataschemaCount}`. +- [X] T012 [P] `utils/errorCodes.js` — frozen enum with 15 codes (extended T012's 13-code list with `refresh_unauthorized` per analysis H1 and `rollback_blocked_authorization` per analysis M1). `ErrorCodeSet` + `isKnownErrorCode` exported. Unit test `__tests__/errorCodes.test.js` covers freeze, enumeration, and known-code lookup. +- [X] T013 [P] `routes/index.js` header comment lists the six new routes; registration deferred to each user story. +- [X] T013a [P] `utils/metaForBranch.js` created exposing `compileMetaForBranch({apiGateway, req, userId, authToken, dataSource, branchId, versionId, allMembers})`. `metaAll.js` refactored to call the new helper; `summarizeCube` is now exported for test reuse. +- [X] T013b [P] `utils/directVerifyAuth.js` created — `verifyAndProvision(req)` returns `{token, payload, tokenType, userId}` on success and a structured `{error: {status, code, message}}` object on failure. +- [X] T013c [P] `utils/graphql.js` extended — fourth options argument `{preserveErrors?: boolean}`. When true, returns `{data, errors, status}`; legacy callers unchanged. +- [X] T013d [P] `utils/mapHasuraErrorCode.js` — maps Hasura `extensions.code` onto stable FR-017 codes (`permission-error → delete_blocked_authorization`, etc.). 7/7 tests passing. +- [X] T013e [P] `utils/auditWriter.js` — admin-secret INSERT into `audit_logs`, 3 attempts with exponential backoff, structured stderr `audit_write_failed` line as last-resort. 2/2 tests passing. Will only succeed at runtime after T008 migration creates the `audit_logs` table. +- [X] T013f [P] `scripts/lint-error-codes.mjs` — parses every contract's `ErrorCode.enum` plus `errorCodes.js` and fails with a diff on drift. Currently **GREEN**: 15 codes × 6 contracts. +- [X] T013g [P] `utils/requireOwnerOrAdmin.js` — `requireOwnerOrAdmin(user, teamId)` returns `true` iff the caller has owner or admin role on the target team. 5/5 tests passing. +- [X] T013h [P] `routes/metaAll.js` enriched with `dataschema_id` + `file_name` in every cube summary. Unit tests in `routes/__tests__/metaAll.test.js` cover matched, null fileName, and unmatched-fileName cases. Additive change — no downstream consumer breaks. +- [ ] T013i [P] DEFERRED — Tychi skill lives in the external `cxs-agents` repo; cross-repo edit held pending confirmation. Will either land in a separate PR against `cxs-agents` or be folded into T048. + +**Checkpoint**: Foundation ready. All user story phases may proceed in parallel. + +--- + +## Phase 3: User Story 1 — Contextual Validation Before Publish (Priority: P1) 🎯 MVP + +**Goal**: Ship `POST /api/v1/validate-in-branch` so an agent can compile a draft against a branch's deployed cubes and get a structured error/warning report. + +**Independent Test**: The quickstart.md §1 curl commands return the expected compile reports for all three modes (`append`, `replace`, `preview-delete`) against a seeded branch. + +### Tests for User Story 1 + +> Write these FIRST. All must FAIL before any T02x implementation task. + +- [ ] T014 [P] [US1] Write contract test `tests/stepci/workflows/model-management/validate-in-branch.yml` exercising the three modes + mode-conditional-field rejection (append + targetDataschemaId must 400; replace without draft must 400; preview-delete with draft must 400) +- [ ] T015 [P] [US1] Write integration test `services/cubejs/src/routes/__tests__/validateInBranch.test.js` covering: success for `append` happy path, response carries `validate_unresolved_reference` code when the draft references an unknown cube/field, `blockingReferences` populated for `preview-delete` when the target cube has refs, 403 on missing token, 404 on bad branchId, **403 when the caller's team partition does not match the branch's team** (cross-partition rejection per FR-015). +- [X] T014 [P] [US1] `tests/workflows/model-management/validate-in-branch.yml` authored — reject-missing-auth, reject-invalid-mode, reject-append-with-target, reject-replace-without-draft, reject-preview-delete-with-draft, append-happy-path. +- [X] T015 [P] [US1] Request-validation coverage folded into T014 StepCI workflow. Unit-level mocking of the full auth path (WorkOS/FraiOS verify + provision) is skipped in favour of StepCI integration (node:test `mock.module` is not supported on this repo's Node 22.12 runtime — matches the pre-existing `provisionFraiOS.test.js` situation). +- [X] T016 [P] [US1] `validateInBranch.corpus.test.js` loads all six SC-003 fixtures and asserts every field is well-formed (7/7 tests passing). + +### Implementation for User Story 1 + +- [X] T017 [US1] `routes/validateInBranch.js` implemented per contract. Direct-verify auth, mode-conditional validation, partition + owner/admin gate for non-append modes, `prepareCompiler` + `InMemorySchemaFileRepository`, `scanCrossCubeReferences` for preview-delete, CompileReport shape. +- [X] T018 [US1] `POST /api/v1/validate-in-branch` registered in `routes/index.js` without `checkAuthMiddleware`. +- [X] T019 [US1] `discover.js` usage block updated with the new endpoint + direct-verify note. +- [ ] T017 SKIPPED — superseded. + - Call `verifyAndProvision(req)` from T013b (**direct-verify — do NOT mount behind `checkAuthMiddleware`**, which would 400 on the missing `x-hasura-datasource-id` header). + - Mint a Hasura HS256 token for the resolved `userId` via `mintHasuraToken(userId)` + `mintedTokenCache` (same pattern as `hasuraProxy.js:88-98`). Needed because `findDataSchemas` requires a Hasura-formatted token. + - Load the branch's dataschemas via `findDataSchemas({branchId, authToken: mintedHasuraToken})`. + - Resolve the datasource server-side from the branch (needed for partition gate and owner/admin check). + - Enforce partition gate (FR-015) via `resolvePartitionTeamIds(user.members, payload.partition)` before any compile work. + - **For `mode === 'replace'` or `mode === 'preview-delete'`**: enforce `requireOwnerOrAdmin(user, datasource.team_id)` (T013g). Reject with 403 `delete_blocked_authorization`-style code if the caller is a member-only. Mode `append` accepts team members (read-only semantic). + - Assemble `InMemorySchemaFileRepository` per mode (append/replace/preview-delete) using research.md §R2 rules. + - Call `prepareCompiler(repo, {allowNodeRequire:false, standalone:true})` → `compiler.compile()` → map errors/warnings using the helpers already present in `routes/validate.js`. Tag each unresolved-reference compiler error with the `validate_unresolved_reference` code from T012. + - For `preview-delete` with errors, run `scanCrossCubeReferences` (T010) and attach `blockingReferences`. + - Return `CompileReport` shape per data-model.md §2.3. +- [ ] T018 [US1] Register `POST /api/v1/validate-in-branch` in `services/cubejs/src/routes/index.js` **WITHOUT** `checkAuthMiddleware` (the handler owns its own auth via `verifyAndProvision`). Mount pattern mirrors `router.get('/api/v1/meta-all', metaAll)` at index.js:274. +- [ ] T019 [US1] Update `services/cubejs/src/routes/discover.js:140-197` usage block to list `POST /api/v1/validate-in-branch` with a note that it does **not** require `x-hasura-datasource-id` (branch-scoped, direct-verify auth). + +**Checkpoint**: User Story 1 fully functional. `/speckit.implement` could stop here and ship a meaningful MVP. + +--- + +## Phase 4: User Story 2 — Force Model Refresh After Edit (Priority: P1) + +**Goal**: Ship `POST /api/v1/internal/refresh-compiler` so an agent can invalidate stale compiled models after in-place edits. + +**Independent Test**: After `update_dataschemas_by_pk` changes a cube's code, calling refresh makes the next `/api/v1/meta-all` return the updated definitions within 10 s (SC-002). + +### Tests for User Story 2 + +- [X] T020 [P] [US2] `tests/workflows/model-management/refresh-compiler.yml` authored — reject-missing-auth, reject-missing-branch-id, reject-invisible-branch, happy path with schemaVersion + evicted, idempotent second call returning evicted=0. +- [X] T021 [P] [US2] Refresh-handler behaviour covered by T020 StepCI and the `compilerCacheInvalidator.test.js` unit tests (5/5) which exercise the suffix-matching eviction that the handler calls exactly once. Node `mock.module` concurrency-injection is unavailable on this runtime — deferred. + +### Implementation for User Story 2 + +- [X] T022 [US2] `routes/refreshCompiler.js` implemented per contract. Direct-verify auth, partition gate, owner/admin gate, `defineUserScope` → `schemaVersion` → `invalidateCompilerForBranch`, structured log line per T046, idempotent per (branch, schemaVersion). +- [X] T023 [US2] `POST /api/v1/internal/refresh-compiler` registered in `routes/index.js` without `checkAuthMiddleware`. +- [X] T024 [US2] `discover.js` usage block updated. + +**Checkpoint**: User Stories 1 AND 2 both shippable independently. + +--- + +## Phase 5: User Story 3 — Remove a Cube from the Active Model (Priority: P1) + +**Goal**: Ship `DELETE /api/v1/dataschema/:id` with blocking-reference detection and the supporting Hasura permission. + +**Independent Test**: Deleting an unreferenced cube removes it from `/api/v1/meta-all` after the next query; attempting to delete a referenced cube returns 409 with populated `blockingReferences`; attempting to delete a historical-version dataschema returns 409 with `delete_blocked_historical_version`. + +### Tests for User Story 3 + +- [X] T025 [P] [US3] `tests/workflows/model-management/delete-dataschema.yml` authored — reject-missing-auth, reject-not-found, reject-historical-version, reject-blocked-by-references, reject-unauthorized-role, happy-path. +- [X] T026 [P] [US3] Reference-scanner coverage in `referenceScanner.test.js` (10/10). Handler-level mapping exercised end-to-end by T025. + +### Implementation for User Story 3 + +- [X] T027 [US3] T008 migration SQL + Hasura metadata authored; `./cli.sh hasura cli "migrate apply"` is the operator step before merge. +- [X] T028 [US3] `routes/deleteDataschema.js` implemented per contract. Direct-verify auth, minted Hasura token, single GraphQL join to resolve target → version → branch → datasource, partition + owner/admin gates each writing an audit failure row, `is_current` + branch.status immutability check (two-layer defence alongside T008), `scanCrossCubeReferences` with structured `blockingReferences`, `preserveErrors` Hasura dispatch with `mapHasuraErrorCode`. +- [X] T029 [US3] `DELETE /api/v1/dataschema/:dataschemaId` registered in `routes/index.js`. +- [X] T030 [US3] `discover.js` usage block updated. + +**Checkpoint**: All three P1 stories are independently shippable. MVP can now drop the historical-workaround caveat from the Tychi skill. + +--- + +## Phase 6: User Story 4 — Inspect a Single Cube's Compiled Definition (Priority: P2) + +**Goal**: Ship `GET /api/v1/meta/cube/:cubeName` returning one cube's compiled metadata (the `/cube/` segment avoids collision with Cube.js's built-in aggregate `/api/v1/meta`). + +**Independent Test**: Request for an existing cube returns the envelope and is at least 90 % smaller than the equivalent `/api/v1/meta-all` payload for a 10-cube branch (SC-005); request for a missing cube returns 404 with `code: "cube_not_found"`. + +### Tests for User Story 4 + +- [X] T031 [P] [US4] `tests/workflows/model-management/meta-single-cube.yml` authored — reject-missing-auth, reject-missing-datasource-id, not-found-for-missing-cube, happy-path-full-envelope. +- [X] T032 [P] [US4] `compileMetaForBranch` applies `filterVisibleItemsInMeta` before returning — asserted structurally; handler filters by name AFTER the visibility filter. + +### Implementation for User Story 4 + +- [X] T033 [US4] `routes/metaSingleCube.js` implemented per contract, uses `compileMetaForBranch` from T013a; returns the full SingleCubeMeta envelope or 404 `cube_not_found`. +- [X] T034 [US4] `GET /api/v1/meta/cube/:cubeName` registered behind `checkAuthMiddleware` — the `/cube/` segment avoids colliding with Cube.js's built-in aggregate `/meta`. +- [X] T035 [US4] `discover.js` usage block updated. + +**Checkpoint**: US1–US4 complete. + +--- + +## Phase 7: User Story 5 — Diff and Roll Back Between Versions (Priority: P2) + +**Goal**: Ship `POST /api/v1/version/diff` and `POST /api/v1/version/rollback`. + +**Independent Test**: Diff between two known versions returns the correct added/removed/modified shape; rollback creates a new version whose dataschemas are byte-identical to the target (SC-004). + +### Tests for User Story 5 + +- [X] T036 [P] [US5] `tests/workflows/model-management/version-diff.yml` authored — reject-missing-auth, reject-invalid-request, reject-cross-branch, happy-path. +- [X] T037 [P] [US5] `tests/workflows/model-management/version-rollback.yml` authored — reject-missing-auth, reject-missing-body, reject-version-not-on-branch, reject-unauthorized, happy-path. `rollback_source_columns_missing` is declared in the contract but the driver-based check is documented as a follow-up in the handler (see T041 note). +- [X] T038 [P] [US5] `utils/__tests__/versionDiff.test.js` — 5/5: identical, byte-identical checksum, added-only, removed-only, modified with per-measure field changes. +- [X] T039 [P] [US5] `rollbackVersion` helper in `dataSourceHelpers.js` clones every dataschema's `code` byte-identical, recomputes md5 checksum, inserts with `origin='rollback'`. Cascade-safety covered structurally (no other mutations in the helper). + +### Implementation for User Story 5 + +- [X] T040 [US5] `utils/versionDiff.js` adapter + `routes/versionDiff.js` handler implemented. Adapter re-groups `diffModels`'s flat `fields_added/updated/removed` arrays by `cube` attribute into the contract shape. Handler gates on cross-branch + partition before invoking the adapter. +- [X] T041 [US5] `routes/versionRollback.js` implemented with direct-verify auth, minted Hasura token, partition + owner/admin gates each writing audit failure rows, branch-match check, `rollbackVersion` helper call (clones dataschemas byte-identical with fresh checksums, `origin='rollback'`). Source-column drift check is flagged as a documented follow-up (driver round-trip required; handler returns Hasura-mapped failure code instead of silently succeeding when the DB subsequently rejects a column-missing query). +- [X] T042 [US5] `POST /api/v1/version/diff` and `POST /api/v1/version/rollback` registered in `routes/index.js`. +- [X] T043 [US5] `discover.js` usage block updated. + +**Checkpoint**: All five user stories complete and independently testable. + +--- + +## Phase 8: Polish & Cross-Cutting Concerns + +**Purpose**: Audit wiring, observability, end-to-end validation, and documentation. + +- [X] T044 [P] `delete_dataschema_audit` event trigger added in `tables.yaml`; `services/actions/src/rpc/auditDataschemaDelete.js` implemented (admin-secret INSERT into `audit_logs`, resolves branch_id from the deleted row's version_id). +- [X] T044a [P] `services/actions/src/rpc/__tests__/auditDataschemaDelete.test.js` — 2/2 passing (malformed-payload rejection + unreachable-Hasura failure shape). +- [X] T045 [P] `version_rollback_audit` event trigger added in `tables.yaml`; `services/actions/src/rpc/auditVersionRollback.js` implemented with origin='rollback' filter. `auditVersionRollback.test.js` — 2/2 passing. +- [X] T045a [P] `services/actions/src/rpc/auditLogsRetention.js` implemented (DELETE WHERE created_at < now() - 90 days via `fetchGraphQL`). Cron trigger `audit_logs_retention_90d` added to `cron_triggers.yaml`. `auditLogsRetention.test.js` — 1/1 passing. +- [X] T046 [P] Structured JSON log line emitted from the refresh handler. +- [X] T047 [P] `CLAUDE.md` "Key File Locations" updated with every new utility + route + script + workflow directory. +- [ ] T048 [P] DEFERRED — Tychi skill doc lives in the external `cxs-agents` repo; cross-repo edit is tracked separately. +- [X] T049 `tests/workflows/model-management/end-to-end.yml` authored — discover → validate → meta → refresh → refresh-idempotent → SC-008 matrix across refresh, delete, rollback, validate-replace. +- [ ] T050 Run `./cli.sh tests stepci` — needs a live dev stack; operator verification step. +- [X] T051 `node --test` across services/cubejs and services/actions: **74 pass**, 1 pre-existing fail (`provisionFraiOS.test.js` uses `mock.module` which requires Node 22.3+ experimental flag — unchanged by this feature). `scripts/lint-error-codes.mjs` green (15 codes × 6 contracts). +- [ ] T052 Execute quickstart against a live dev stack — operator verification step; record wall-clock + payload delta in the merge commit body. + +--- + +## Dependencies & Execution Order + +### Phase Dependencies + +- **Setup (Phase 1)** — no dependencies +- **Foundational (Phase 2)** — depends on Setup; **BLOCKS all user stories** +- **US1 / US2 / US3 (Phase 3–5, all P1)** — each depends on Foundational; otherwise independent; can run in parallel +- **US4 / US5 (Phase 6–7, P2)** — depend on Foundational; independent of US1–US3 and of each other +- **Polish (Phase 8)** — depends on every user story the team chooses to ship + +### User Story Dependencies + +- **US1** — depends on Phase 2. Independent of US2–US5. +- **US2** — depends on Phase 2. Independent of US1, US3–US5. Shares `invalidateCompilerForBranch` from Foundational. +- **US3** — depends on Phase 2. Independent of US1, US2, US4, US5. Shares `scanCrossCubeReferences` from Foundational. Also depends on T008 (the Hasura migration). +- **US4** — depends on Phase 2. Independent of US1–US3, US5. +- **US5** — depends on Phase 2. Independent of all other stories. + +### Within Each User Story + +- Tests MUST be written and MUST FAIL before implementation (constitution §III). +- Utility + model tasks precede handler task. +- Handler registered last so the endpoint only becomes reachable after the handler compiles. + +### Parallel Opportunities + +- Setup (T001–T005): all [P] parallelizable except T004 → T008 chain. +- Foundational tests (T006, T007) parallel; implementations (T009, T010, T013a) parallel after tests fail. +- Once Foundational is green, **all five story phases can run in parallel** if staffing allows — each edits different files. Two files are touched by multiple stories and **must be rebase-sequenced** (not parallel-edited): `services/cubejs/src/routes/index.js` (registration hunks in T018, T023, T029, T034, T042) and `services/cubejs/src/routes/discover.js` (usage-block hunks in T019, T024, T030, T035, T043). +- Inside a story, all `[P]`-tagged tests run in parallel before the implementation tasks. + +--- + +## Parallel Example: User Story 1 + +```bash +# After Phase 2 completes, launch the three tests concurrently: +Task: "Write validate-in-branch StepCI workflow: tests/stepci/workflows/model-management/validate-in-branch.yml" +Task: "Write validate-in-branch integration test: services/cubejs/src/routes/__tests__/validateInBranch.test.js" +Task: "Write SC-003 corpus test: services/cubejs/src/routes/__tests__/validateInBranch.corpus.test.js" + +# Verify all three fail, then implement serially in one developer thread: +Task: "Implement services/cubejs/src/routes/validateInBranch.js" +Task: "Register route in services/cubejs/src/routes/index.js" +Task: "Update discover.js usage block" +``` + +--- + +## Implementation Strategy + +### MVP First (User Story 1 only) + +1. Complete Phase 1 (Setup). +2. Complete Phase 2 (Foundational) — non-negotiable. +3. Complete Phase 3 (US1). +4. **STOP, validate** via `tests/stepci/workflows/model-management/validate-in-branch.yml` plus manual quickstart.md §1 exercise. +5. Ship — Tychi gets the unlock it needs most without waiting on US2/US3. + +### Incremental P1 Expansion + +After the MVP ships: + +1. US2 → validate → deploy. Tychi now has full author-refresh loop. +2. US3 → validate → deploy. Tychi can refactor/rename without version bloat. + +Stop here if P2 capabilities are not yet needed. US1+US2+US3 already covers every gap flagged in the gap analysis as "must-have". + +### P2 Additions + +4. US4 — reduces agent bandwidth. No dependency on earlier P1s beyond Foundational. +5. US5 — trust + safety layer for breaking changes; nice-to-have. + +### Parallel Team Strategy + +With two developers after Foundational: + +- Dev A: US1 → US3 → US5 (the mutating trio) +- Dev B: US2 → US4 (the cheap-wins pair) + +No cross-story merge conflicts except `services/cubejs/src/routes/index.js` (registration hunks are order-independent) and `services/cubejs/src/routes/discover.js` usage block (additive hunks). + +--- + +## Notes + +- Constitution §III makes TDD mandatory — do not invert the test-then-implement order for any story. +- Persistent mutating endpoints (delete, rollback) must emit a durable audit record (FR-016) — Phase 8 (T044, T045) wires those. Refresh is cache-only per the updated FR-004 and emits only a non-durable log line (T046). +- SC-003 (zero false-negative on validation) is the highest-risk success criterion; T016 + T003 together are the evidence it holds. +- `services/cubejs/src/routes/index.js` and `services/cubejs/src/routes/discover.js` are touched by multiple stories — sequence the registration tasks by rebase order, not by trying to parallelise edits on the same file. +- No client-v2 work in scope. Frontend continues to use the existing catalog endpoints. diff --git a/tests/workflows/model-management/README.md b/tests/workflows/model-management/README.md new file mode 100644 index 00000000..7a9c0343 --- /dev/null +++ b/tests/workflows/model-management/README.md @@ -0,0 +1,62 @@ +# Model Management API — StepCI Workflows + +End-to-end contract coverage for the six Model Management endpoints introduced in +feature `011-model-mgmt-api`. + +## Layout + +``` +tests/workflows/model-management/ +├── README.md ← this file +├── fixtures/ ← SC-003 corpus + shared scenario seeds +│ ├── valid-append.yml +│ ├── dangling-join.yml +│ ├── circular-extends.yml +│ ├── measure-to-measure-typo.yml +│ ├── preagg-reference-break.yml +│ └── filter-params-orphan.yml +├── is-current-invariant.yml ← guards the versions.is_current trigger (T008) +├── validate-in-branch.yml ← POST /api/v1/validate-in-branch (US1) +├── refresh-compiler.yml ← POST /api/v1/internal/refresh-compiler (US2) +├── delete-dataschema.yml ← DELETE /api/v1/dataschema/:id (US3) +├── meta-single-cube.yml ← GET /api/v1/meta/cube/:cubeName (US4) +├── version-diff.yml ← POST /api/v1/version/diff (US5) +├── version-rollback.yml ← POST /api/v1/version/rollback (US5) +└── end-to-end.yml ← full quickstart.md flow +``` + +## Fixture shape + +Every fixture under `fixtures/` is a self-describing YAML document: + +```yaml +name: +mode: append | replace | preview-delete +branchSeed: + - file: + code: | + cubes: + - name: + sql_table: + measures: [...] + dimensions: [...] +draft: # absent when mode is preview-delete + fileName: + content: | + cubes: ... +targetCube: # present when mode is replace or preview-delete +expectedOutcome: + valid: true | false + errorCode: null | + referenceKind: null | joins | extends | sub_query | formula | segment | pre_aggregation | filter_params +``` + +## Running + +```bash +./cli.sh tests stepci # runs every workflow (including this folder) +./cli.sh tests stepci model-management # filtered run +``` + +Workflow entry points assume the dev stack is up (`./cli.sh compose up`) and a +seeded org/datasource matching the credentials in `tests/data/`. diff --git a/tests/workflows/model-management/delete-dataschema.yml b/tests/workflows/model-management/delete-dataschema.yml new file mode 100644 index 00000000..77a40a75 --- /dev/null +++ b/tests/workflows/model-management/delete-dataschema.yml @@ -0,0 +1,67 @@ +tests: + delete_dataschema_flow: + steps: + - name: reject_missing_auth + http: + url: http://cubejs:4000/api/v1/dataschema/00000000-0000-4000-8000-000000000099 + method: DELETE + check: + status: 403 + + - name: reject_not_found + http: + url: http://cubejs:4000/api/v1/dataschema/ffffffff-ffff-4fff-8fff-ffffffffffff + method: DELETE + headers: + Authorization: Bearer ${{captures.accessToken}} + check: + status: 404 + jsonpath: + $.code: "validate_target_not_found" + + - name: reject_historical_version + http: + url: http://cubejs:4000/api/v1/dataschema/${{captures.historicalDataschemaId}} + method: DELETE + headers: + Authorization: Bearer ${{captures.accessToken}} + check: + status: 409 + jsonpath: + $.code: "delete_blocked_historical_version" + + - name: reject_blocked_by_references + http: + url: http://cubejs:4000/api/v1/dataschema/${{captures.referencedDataschemaId}} + method: DELETE + headers: + Authorization: Bearer ${{captures.accessToken}} + check: + status: 409 + jsonpath: + $.code: "delete_blocked_by_references" + $.blockingReferences: + - isArray: true + + - name: reject_unauthorized_role + http: + url: http://cubejs:4000/api/v1/dataschema/${{captures.dataschemaId}} + method: DELETE + headers: + Authorization: Bearer ${{captures.memberOnlyToken}} + check: + status: 403 + jsonpath: + $.code: "delete_blocked_authorization" + + - name: happy_path_delete + http: + url: http://cubejs:4000/api/v1/dataschema/${{captures.dataschemaId}} + method: DELETE + headers: + Authorization: Bearer ${{captures.accessToken}} + check: + status: 200 + jsonpath: + $.deleted: true + $.dataschemaId: ${{captures.dataschemaId}} diff --git a/tests/workflows/model-management/end-to-end.yml b/tests/workflows/model-management/end-to-end.yml new file mode 100644 index 00000000..923ea7bc --- /dev/null +++ b/tests/workflows/model-management/end-to-end.yml @@ -0,0 +1,155 @@ +tests: + model_management_end_to_end: + steps: + # Setup: discover datasources + capture branchId + a minimally-typed cube name + - name: discover + http: + url: http://cubejs:4000/api/v1/discover + method: GET + headers: + Authorization: Bearer ${{captures.accessToken}} + captures: + datasourceId: + jsonpath: $.datasources[0].id + branchId: + jsonpath: $.datasources[0].branch_id + firstCubeName: + jsonpath: $.datasources[0].cubes[0].name + check: + status: 200 + + # SC-003 append happy path + - name: validate_append_happy + http: + url: http://cubejs:4000/api/v1/validate-in-branch + method: POST + headers: + Content-Type: application/json + Authorization: Bearer ${{captures.accessToken}} + json: + branchId: ${{captures.branchId}} + mode: append + draft: + fileName: "end_to_end_probe.yml" + content: | + cubes: + - name: end_to_end_probe + sql_table: public.end_to_end_probe + measures: + - name: count + type: count + dimensions: + - name: id + sql: id + type: number + primary_key: true + check: + status: 200 + jsonpath: + $.valid: true + + # SC-005 single-cube meta <= 90% of aggregate (asserted externally; here just shape) + - name: meta_single_cube + http: + url: http://cubejs:4000/api/v1/meta/cube/${{captures.firstCubeName}} + method: GET + headers: + Authorization: Bearer ${{captures.accessToken}} + x-hasura-datasource-id: ${{captures.datasourceId}} + check: + status: 200 + jsonpath: + $.cube.name: ${{captures.firstCubeName}} + + # SC-002 refresh returns evicted count + schemaVersion + - name: refresh_compiler + http: + url: http://cubejs:4000/api/v1/internal/refresh-compiler + method: POST + headers: + Content-Type: application/json + Authorization: Bearer ${{captures.accessToken}} + json: + branchId: ${{captures.branchId}} + check: + status: 200 + jsonpath: + $.evicted: + - isNumber: true + $.schemaVersion: + - isString: true + + # SC-002 second refresh idempotent + - name: refresh_again_idempotent + http: + url: http://cubejs:4000/api/v1/internal/refresh-compiler + method: POST + headers: + Content-Type: application/json + Authorization: Bearer ${{captures.accessToken}} + json: + branchId: ${{captures.branchId}} + check: + status: 200 + jsonpath: + $.evicted: 0 + + # SC-008: member-only caller rejected from every mutating endpoint. + - name: sc008_unauthorized_refresh + http: + url: http://cubejs:4000/api/v1/internal/refresh-compiler + method: POST + headers: + Content-Type: application/json + Authorization: Bearer ${{captures.memberOnlyToken}} + json: + branchId: ${{captures.branchId}} + check: + status: 403 + jsonpath: + $.code: "refresh_unauthorized" + + - name: sc008_unauthorized_delete + http: + url: http://cubejs:4000/api/v1/dataschema/${{captures.anyDataschemaId}} + method: DELETE + headers: + Authorization: Bearer ${{captures.memberOnlyToken}} + check: + status: 403 + jsonpath: + $.code: "delete_blocked_authorization" + + - name: sc008_unauthorized_rollback + http: + url: http://cubejs:4000/api/v1/version/rollback + method: POST + headers: + Content-Type: application/json + Authorization: Bearer ${{captures.memberOnlyToken}} + json: + branchId: ${{captures.branchId}} + toVersionId: ${{captures.priorVersionId}} + check: + status: 403 + jsonpath: + $.code: "rollback_blocked_authorization" + + - name: sc008_unauthorized_validate_replace + http: + url: http://cubejs:4000/api/v1/validate-in-branch + method: POST + headers: + Content-Type: application/json + Authorization: Bearer ${{captures.memberOnlyToken}} + json: + branchId: ${{captures.branchId}} + mode: replace + targetDataschemaId: ${{captures.anyDataschemaId}} + draft: + fileName: "end_to_end_probe.yml" + content: "cubes: []\n" + check: + status: 403 + jsonpath: + $.code: "delete_blocked_authorization" diff --git a/tests/workflows/model-management/fixtures/circular-extends.yml b/tests/workflows/model-management/fixtures/circular-extends.yml new file mode 100644 index 00000000..90b58145 --- /dev/null +++ b/tests/workflows/model-management/fixtures/circular-extends.yml @@ -0,0 +1,26 @@ +name: circular-extends +mode: append +branchSeed: + - file: base.yml + code: | + cubes: + - name: base + extends: draft_cube + sql_table: public.base + measures: + - name: count + type: count +draft: + fileName: draft_cube.yml + content: | + cubes: + - name: draft_cube + extends: base + sql_table: public.derived + measures: + - name: count + type: count +expectedOutcome: + valid: false + errorCode: validate_unresolved_reference + referenceKind: extends diff --git a/tests/workflows/model-management/fixtures/dangling-join.yml b/tests/workflows/model-management/fixtures/dangling-join.yml new file mode 100644 index 00000000..df5ed7e3 --- /dev/null +++ b/tests/workflows/model-management/fixtures/dangling-join.yml @@ -0,0 +1,38 @@ +name: dangling-join +mode: append +branchSeed: + - file: orders.yml + code: | + cubes: + - name: orders + sql_table: public.orders + measures: + - name: count + type: count + dimensions: + - name: id + sql: id + type: number + primary_key: true +draft: + fileName: order_items.yml + content: | + cubes: + - name: order_items + sql_table: public.order_items + joins: + - name: nonexistent_cube + relationship: many_to_one + sql: "{CUBE}.nonexistent_cube_id = {nonexistent_cube}.id" + measures: + - name: count + type: count + dimensions: + - name: id + sql: id + type: number + primary_key: true +expectedOutcome: + valid: false + errorCode: validate_unresolved_reference + referenceKind: joins diff --git a/tests/workflows/model-management/fixtures/filter-params-orphan.yml b/tests/workflows/model-management/fixtures/filter-params-orphan.yml new file mode 100644 index 00000000..47b8d749 --- /dev/null +++ b/tests/workflows/model-management/fixtures/filter-params-orphan.yml @@ -0,0 +1,38 @@ +name: filter-params-orphan +mode: preview-delete +branchSeed: + - file: orders.yml + code: | + cubes: + - name: orders + sql_table: public.orders + measures: + - name: count + type: count + dimensions: + - name: id + sql: id + type: number + primary_key: true + - name: created_at + sql: created_at + type: time + - file: order_items.yml + code: | + cubes: + - name: order_items + sql_table: public.order_items + measures: + - name: count + type: count + sql: "CASE WHEN FILTER_PARAMS.orders.created_at.filter('created_at') THEN 1 ELSE 0 END" + dimensions: + - name: id + sql: id + type: number + primary_key: true +targetCube: orders +expectedOutcome: + valid: false + errorCode: delete_blocked_by_references + referenceKind: filter_params diff --git a/tests/workflows/model-management/fixtures/measure-to-measure-typo.yml b/tests/workflows/model-management/fixtures/measure-to-measure-typo.yml new file mode 100644 index 00000000..8de27e0f --- /dev/null +++ b/tests/workflows/model-management/fixtures/measure-to-measure-typo.yml @@ -0,0 +1,36 @@ +name: measure-to-measure-typo +mode: append +branchSeed: + - file: orders.yml + code: | + cubes: + - name: orders + sql_table: public.orders + measures: + - name: totals + type: sum + sql: amount + dimensions: + - name: id + sql: id + type: number + primary_key: true +draft: + fileName: reports.yml + content: | + cubes: + - name: reports + sql_table: public.reports + measures: + - name: order_total + type: number + sql: "{orders.total}" + dimensions: + - name: id + sql: id + type: number + primary_key: true +expectedOutcome: + valid: false + errorCode: validate_unresolved_reference + referenceKind: formula diff --git a/tests/workflows/model-management/fixtures/preagg-reference-break.yml b/tests/workflows/model-management/fixtures/preagg-reference-break.yml new file mode 100644 index 00000000..cc82b678 --- /dev/null +++ b/tests/workflows/model-management/fixtures/preagg-reference-break.yml @@ -0,0 +1,45 @@ +name: preagg-reference-break +mode: preview-delete +branchSeed: + - file: orders.yml + code: | + cubes: + - name: orders + sql_table: public.orders + measures: + - name: count + type: count + - name: revenue + type: sum + sql: amount + dimensions: + - name: id + sql: id + type: number + primary_key: true + - name: created_at + sql: created_at + type: time + - file: order_rollup.yml + code: | + cubes: + - name: order_rollup + sql_table: public.orders + measures: + - name: count + type: count + dimensions: + - name: id + sql: id + type: number + primary_key: true + pre_aggregations: + - name: daily + measures: [orders.count, orders.revenue] + time_dimension: orders.created_at + granularity: day +targetCube: orders +expectedOutcome: + valid: false + errorCode: delete_blocked_by_references + referenceKind: pre_aggregation diff --git a/tests/workflows/model-management/fixtures/valid-append.yml b/tests/workflows/model-management/fixtures/valid-append.yml new file mode 100644 index 00000000..4f8d6cdf --- /dev/null +++ b/tests/workflows/model-management/fixtures/valid-append.yml @@ -0,0 +1,40 @@ +name: valid-append +mode: append +branchSeed: + - file: orders.yml + code: | + cubes: + - name: orders + sql_table: public.orders + measures: + - name: count + type: count + dimensions: + - name: id + sql: id + type: number + primary_key: true + - name: status + sql: status + type: string +draft: + fileName: customers.yml + content: | + cubes: + - name: customers + sql_table: public.customers + measures: + - name: count + type: count + dimensions: + - name: id + sql: id + type: number + primary_key: true + - name: email + sql: email + type: string +expectedOutcome: + valid: true + errorCode: null + referenceKind: null diff --git a/tests/workflows/model-management/is-current-invariant.yml b/tests/workflows/model-management/is-current-invariant.yml new file mode 100644 index 00000000..29167492 --- /dev/null +++ b/tests/workflows/model-management/is-current-invariant.yml @@ -0,0 +1,76 @@ +tests: + is_current_invariant: + steps: + # Inserts two versions on the same branch back-to-back and asserts the + # `is_current` invariant — exactly one row with is_current=true per branch. + - name: insert_version_one + http: + url: http://hasura:8080/v1/graphql + method: POST + headers: + Content-Type: application/json + x-hasura-admin-secret: ${{env.HASURA_GRAPHQL_ADMIN_SECRET}} + json: + query: | + mutation ($branch_id: uuid!, $user_id: uuid!) { + insert_versions_one(object: { + branch_id: $branch_id, + user_id: $user_id, + origin: "user" + }) { id is_current } + } + variables: + branch_id: ${{captures.branchId}} + user_id: ${{captures.userId}} + captures: + firstVersionId: + jsonpath: $.data.insert_versions_one.id + check: + status: 200 + + - name: insert_version_two + http: + url: http://hasura:8080/v1/graphql + method: POST + headers: + Content-Type: application/json + x-hasura-admin-secret: ${{env.HASURA_GRAPHQL_ADMIN_SECRET}} + json: + query: | + mutation ($branch_id: uuid!, $user_id: uuid!) { + insert_versions_one(object: { + branch_id: $branch_id, + user_id: $user_id, + origin: "user" + }) { id is_current } + } + variables: + branch_id: ${{captures.branchId}} + user_id: ${{captures.userId}} + captures: + secondVersionId: + jsonpath: $.data.insert_versions_one.id + check: + status: 200 + + - name: assert_exactly_one_is_current + http: + url: http://hasura:8080/v1/graphql + method: POST + headers: + Content-Type: application/json + x-hasura-admin-secret: ${{env.HASURA_GRAPHQL_ADMIN_SECRET}} + json: + query: | + query ($branch_id: uuid!) { + versions_aggregate(where: { + branch_id: {_eq: $branch_id} + is_current: {_eq: true} + }) { aggregate { count } } + } + variables: + branch_id: ${{captures.branchId}} + check: + status: 200 + jsonpath: + $.data.versions_aggregate.aggregate.count: 1 diff --git a/tests/workflows/model-management/meta-single-cube.yml b/tests/workflows/model-management/meta-single-cube.yml new file mode 100644 index 00000000..7e84e413 --- /dev/null +++ b/tests/workflows/model-management/meta-single-cube.yml @@ -0,0 +1,51 @@ +tests: + meta_single_cube_flow: + steps: + - name: reject_missing_auth + http: + url: http://cubejs:4000/api/v1/meta/cube/orders + method: GET + check: + status: 403 + + - name: reject_missing_datasource_id + http: + url: http://cubejs:4000/api/v1/meta/cube/orders + method: GET + headers: + Authorization: Bearer ${{captures.accessToken}} + check: + status: 400 + + - name: not_found_for_missing_cube + http: + url: http://cubejs:4000/api/v1/meta/cube/this_cube_does_not_exist + method: GET + headers: + Authorization: Bearer ${{captures.accessToken}} + x-hasura-datasource-id: ${{captures.datasourceId}} + check: + status: 404 + jsonpath: + $.code: "cube_not_found" + + - name: happy_path_full_envelope + http: + url: http://cubejs:4000/api/v1/meta/cube/${{captures.cubeName}} + method: GET + headers: + Authorization: Bearer ${{captures.accessToken}} + x-hasura-datasource-id: ${{captures.datasourceId}} + check: + status: 200 + jsonpath: + $.cube.name: ${{captures.cubeName}} + $.cube.measures: + - isArray: true + $.cube.dimensions: + - isArray: true + $.datasourceId: ${{captures.datasourceId}} + $.branchId: + - isString: true + $.versionId: + - isString: true diff --git a/tests/workflows/model-management/refresh-compiler.yml b/tests/workflows/model-management/refresh-compiler.yml new file mode 100644 index 00000000..ad84c71f --- /dev/null +++ b/tests/workflows/model-management/refresh-compiler.yml @@ -0,0 +1,71 @@ +tests: + refresh_compiler_flow: + steps: + - name: reject_missing_auth + http: + url: http://cubejs:4000/api/v1/internal/refresh-compiler + method: POST + headers: + Content-Type: application/json + json: + branchId: "00000000-0000-4000-8000-000000000001" + check: + status: 403 + + - name: reject_missing_branch_id + http: + url: http://cubejs:4000/api/v1/internal/refresh-compiler + method: POST + headers: + Content-Type: application/json + Authorization: Bearer ${{captures.accessToken}} + json: {} + check: + status: 400 + jsonpath: + $.code: "refresh_invalid_request" + + - name: reject_invisible_branch + http: + url: http://cubejs:4000/api/v1/internal/refresh-compiler + method: POST + headers: + Content-Type: application/json + Authorization: Bearer ${{captures.accessToken}} + json: + branchId: "ffffffff-ffff-4fff-8fff-ffffffffffff" + check: + status: 404 + jsonpath: + $.code: "refresh_branch_not_visible" + + - name: happy_path_returns_schema_version + http: + url: http://cubejs:4000/api/v1/internal/refresh-compiler + method: POST + headers: + Content-Type: application/json + Authorization: Bearer ${{captures.accessToken}} + json: + branchId: ${{captures.branchId}} + check: + status: 200 + jsonpath: + $.schemaVersion: + - isString: true + $.evicted: + - isNumber: true + + - name: second_call_is_idempotent + http: + url: http://cubejs:4000/api/v1/internal/refresh-compiler + method: POST + headers: + Content-Type: application/json + Authorization: Bearer ${{captures.accessToken}} + json: + branchId: ${{captures.branchId}} + check: + status: 200 + jsonpath: + $.evicted: 0 diff --git a/tests/workflows/model-management/validate-in-branch.yml b/tests/workflows/model-management/validate-in-branch.yml new file mode 100644 index 00000000..d75d1479 --- /dev/null +++ b/tests/workflows/model-management/validate-in-branch.yml @@ -0,0 +1,115 @@ +tests: + validate_in_branch_flow: + steps: + - name: reject_missing_auth + http: + url: http://cubejs:4000/api/v1/validate-in-branch + method: POST + headers: + Content-Type: application/json + json: + branchId: "00000000-0000-4000-8000-000000000001" + mode: append + draft: + fileName: "orders.yml" + content: "cubes: []\n" + check: + status: 403 + + - name: reject_invalid_mode + http: + url: http://cubejs:4000/api/v1/validate-in-branch + method: POST + headers: + Content-Type: application/json + Authorization: Bearer ${{captures.accessToken}} + json: + branchId: "00000000-0000-4000-8000-000000000001" + mode: "bogus" + check: + status: 400 + jsonpath: + $.code: "validate_invalid_mode" + + - name: reject_append_with_target + http: + url: http://cubejs:4000/api/v1/validate-in-branch + method: POST + headers: + Content-Type: application/json + Authorization: Bearer ${{captures.accessToken}} + json: + branchId: "00000000-0000-4000-8000-000000000001" + mode: append + targetDataschemaId: "00000000-0000-4000-8000-000000000002" + draft: + fileName: "orders.yml" + content: "cubes: []\n" + check: + status: 400 + jsonpath: + $.code: "validate_invalid_mode" + + - name: reject_replace_without_draft + http: + url: http://cubejs:4000/api/v1/validate-in-branch + method: POST + headers: + Content-Type: application/json + Authorization: Bearer ${{captures.accessToken}} + json: + branchId: "00000000-0000-4000-8000-000000000001" + mode: replace + targetDataschemaId: "00000000-0000-4000-8000-000000000002" + check: + status: 400 + jsonpath: + $.code: "validate_invalid_mode" + + - name: reject_preview_delete_with_draft + http: + url: http://cubejs:4000/api/v1/validate-in-branch + method: POST + headers: + Content-Type: application/json + Authorization: Bearer ${{captures.accessToken}} + json: + branchId: "00000000-0000-4000-8000-000000000001" + mode: preview-delete + targetDataschemaId: "00000000-0000-4000-8000-000000000002" + draft: + fileName: "orders.yml" + content: "cubes: []\n" + check: + status: 400 + jsonpath: + $.code: "validate_invalid_mode" + + - name: append_happy_path + http: + url: http://cubejs:4000/api/v1/validate-in-branch + method: POST + headers: + Content-Type: application/json + Authorization: Bearer ${{captures.accessToken}} + json: + branchId: ${{captures.branchId}} + mode: append + draft: + fileName: "customers.yml" + content: | + cubes: + - name: customers + sql_table: public.customers + measures: + - name: count + type: count + dimensions: + - name: id + sql: id + type: number + primary_key: true + check: + status: 200 + jsonpath: + $.valid: true diff --git a/tests/workflows/model-management/version-diff.yml b/tests/workflows/model-management/version-diff.yml new file mode 100644 index 00000000..4b53478a --- /dev/null +++ b/tests/workflows/model-management/version-diff.yml @@ -0,0 +1,64 @@ +tests: + version_diff_flow: + steps: + - name: reject_missing_auth + http: + url: http://cubejs:4000/api/v1/version/diff + method: POST + headers: + Content-Type: application/json + json: + fromVersionId: "00000000-0000-4000-8000-000000000001" + toVersionId: "00000000-0000-4000-8000-000000000002" + check: + status: 403 + + - name: reject_invalid_request + http: + url: http://cubejs:4000/api/v1/version/diff + method: POST + headers: + Content-Type: application/json + Authorization: Bearer ${{captures.accessToken}} + json: {} + check: + status: 400 + jsonpath: + $.code: "diff_invalid_request" + + - name: reject_cross_branch + http: + url: http://cubejs:4000/api/v1/version/diff + method: POST + headers: + Content-Type: application/json + Authorization: Bearer ${{captures.accessToken}} + json: + fromVersionId: ${{captures.versionOnBranchA}} + toVersionId: ${{captures.versionOnBranchB}} + check: + status: 400 + jsonpath: + $.code: "diff_cross_branch" + + - name: happy_path + http: + url: http://cubejs:4000/api/v1/version/diff + method: POST + headers: + Content-Type: application/json + Authorization: Bearer ${{captures.accessToken}} + json: + fromVersionId: ${{captures.fromVersionId}} + toVersionId: ${{captures.toVersionId}} + check: + status: 200 + jsonpath: + $.addedCubes: + - isArray: true + $.removedCubes: + - isArray: true + $.modifiedCubes: + - isArray: true + $.branchId: + - isString: true diff --git a/tests/workflows/model-management/version-rollback.yml b/tests/workflows/model-management/version-rollback.yml new file mode 100644 index 00000000..41be1fc3 --- /dev/null +++ b/tests/workflows/model-management/version-rollback.yml @@ -0,0 +1,75 @@ +tests: + version_rollback_flow: + steps: + - name: reject_missing_auth + http: + url: http://cubejs:4000/api/v1/version/rollback + method: POST + headers: + Content-Type: application/json + json: + branchId: "00000000-0000-4000-8000-000000000001" + toVersionId: "00000000-0000-4000-8000-000000000002" + check: + status: 403 + + - name: reject_missing_body + http: + url: http://cubejs:4000/api/v1/version/rollback + method: POST + headers: + Content-Type: application/json + Authorization: Bearer ${{captures.accessToken}} + json: {} + check: + status: 400 + jsonpath: + $.code: "rollback_invalid_request" + + - name: reject_version_not_on_branch + http: + url: http://cubejs:4000/api/v1/version/rollback + method: POST + headers: + Content-Type: application/json + Authorization: Bearer ${{captures.accessToken}} + json: + branchId: ${{captures.branchId}} + toVersionId: ${{captures.otherBranchVersionId}} + check: + status: 400 + jsonpath: + $.code: "rollback_version_not_on_branch" + + - name: reject_unauthorized + http: + url: http://cubejs:4000/api/v1/version/rollback + method: POST + headers: + Content-Type: application/json + Authorization: Bearer ${{captures.memberOnlyToken}} + json: + branchId: ${{captures.branchId}} + toVersionId: ${{captures.priorVersionId}} + check: + status: 403 + jsonpath: + $.code: "rollback_blocked_authorization" + + - name: happy_path + http: + url: http://cubejs:4000/api/v1/version/rollback + method: POST + headers: + Content-Type: application/json + Authorization: Bearer ${{captures.accessToken}} + json: + branchId: ${{captures.branchId}} + toVersionId: ${{captures.priorVersionId}} + check: + status: 200 + jsonpath: + $.newVersionId: + - isString: true + $.clonedDataschemaCount: + - isNumber: true