Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion services/cubejs/src/routes/smartGenerate.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import { deserializeProfile } from '../utils/smart-generation/profileSerializer.
import { diffModels, parseCubesFromJs } from '../utils/smart-generation/diffModels.js';
import { loadRules } from '../utils/queryRewrite.js';
import { validateModelSyntax, smokeTestQuery } from '../utils/smart-generation/modelValidator.js';
import { fetchClickHouseAliasColumnNames } from '../utils/smart-generation/clickHouseAliasColumns.js';

function reorderProfileColumns(profiledTable) {
if (!profiledTable?.columns || !(profiledTable.columns instanceof Map)) return profiledTable;
Expand Down Expand Up @@ -216,7 +217,12 @@ export default async (req, res, cubejs) => {
profiledTable = { ...profiledTable, columns: filtered, columnOrder: filteredOrder };
}

// Build cubes
// Build cubes — include ClickHouse ALIAS columns explicitly in cube sql (after SELECT *)
let aliasColumnNames = [];
if (driver) {
aliasColumnNames = await fetchClickHouseAliasColumnNames(driver, schema, table);
}

emitter.emit('building', 'Building cube definitions...', 0.6);
const cubeResult = buildCubes(profiledTable, {
partition,
Expand All @@ -227,6 +233,7 @@ export default async (req, res, cubejs) => {
cubeName: cubeNameOverride,
filters,
nestedFilters,
aliasColumnNames,
});

// Store filters in cube-level meta for provenance tracking
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import { describe, it } from 'node:test';
import assert from 'node:assert';
import { fetchClickHouseAliasColumnNames } from '../clickHouseAliasColumns.js';

describe('fetchClickHouseAliasColumnNames', () => {
it('returns distinct names from driver rows (ALIAS-only query)', async () => {
const driver = {
query: async (sql) => {
assert.ok(sql.includes("default_kind = 'ALIAS'"), sql);
return [{ name: 'duration_ratio' }, { name: 'other_alias' }];
},
};
const names = await fetchClickHouseAliasColumnNames(driver, 'dev', 'semantic_events');
assert.deepStrictEqual(names, ['duration_ratio', 'other_alias']);
});

it('reads name from alternate row keys', async () => {
const driver = {
query: async () => [{ Name: 'col_a' }],
};
const names = await fetchClickHouseAliasColumnNames(driver, 'd', 't');
assert.deepStrictEqual(names, ['col_a']);
});

it('returns [] when query throws', async () => {
const driver = {
query: async () => {
throw new Error('no system.columns');
},
};
const names = await fetchClickHouseAliasColumnNames(driver, 'd', 't');
assert.deepStrictEqual(names, []);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,23 @@ describe('cubeBuilder – buildCubes', () => {
assert.strictEqual(cubes[0].sql_table, 'test_db.events');
});

it('should list ClickHouse ALIAS columns after SELECT * when aliasColumnNames is provided', () => {
const { cubes } = buildCubes(table, { aliasColumnNames: ['duration_ratio'] });
assert.strictEqual(cubes[0].sql, 'SELECT *, duration_ratio FROM test_db.events');
assert.strictEqual(cubes[0].sql_table, undefined);
});

it('should append ALIAS columns to filtered cube sql', () => {
const { cubes } = buildCubes(table, {
aliasColumnNames: ['duration_ratio'],
filters: [{ column: 'id', operator: '=', value: 'x' }],
});
assert.strictEqual(
cubes[0].sql,
"SELECT *, duration_ratio FROM test_db.events WHERE id = 'x'"
);
});

it('should produce dimensions for string and date columns', () => {
const { cubes } = buildCubes(table);
const dimNames = cubes[0].dimensions.map((d) => d.name);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,4 +243,18 @@ describe('yamlGenerator – generateJs advanced properties', () => {
const js = generateJs([cube]);
assert.ok(js.includes('${total_amount}'), 'should convert {measure} to ${measure}');
});

it('emits cube-level sql with JSON string so ClickHouse backticks are not escaped (no \\\\`)', () => {
const sql = 'SELECT *, `commerce.total`, duration_ratio FROM dev.semantic_events';
const cube = {
name: 'semantic_events',
sql,
meta: { auto_generated: true },
dimensions: [],
measures: [],
};
const js = generateJs([cube]);
assert.ok(js.includes('sql: "SELECT *, `commerce.total`, duration_ratio FROM dev.semantic_events"'), js);
assert.ok(!js.includes('\\`commerce'), 'should not escape inner backticks with backslash');
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/**
* Load ALIAS column names from ClickHouse system.columns so generated cube
* `sql` can list them explicitly after SELECT *.
*
* Only `default_kind = 'ALIAS'` columns are included (not MATERIALIZED/DEFAULT).
*
* @param {object} driver - ClickHouse driver with .query(sql)
* @param {string} database
* @param {string} table
* @returns {Promise<string[]>}
*/
function rowColumnName(row) {
if (!row || typeof row !== 'object') return null;
return row.name ?? row.Name ?? row.column_name ?? null;
}

export async function fetchClickHouseAliasColumnNames(driver, database, table) {
if (!driver?.query || !database || !table) return [];
const db = String(database).replace(/'/g, "''");
const tbl = String(table).replace(/'/g, "''");
try {
const rows = await driver.query(
`SELECT name FROM system.columns `
+ `WHERE database = '${db}' AND table = '${tbl}' `
+ `AND default_kind = 'ALIAS' `
+ `ORDER BY position`
);
const list = Array.isArray(rows) ? rows : [];
const names = list
.map((r) => rowColumnName(r))
.filter((n) => typeof n === 'string' && n.length > 0);
return [...new Set(names)];
} catch (err) {
console.warn(`[smartGenerate] ALIAS column lookup failed (non-fatal): ${err.message}`);
return [];
}
}
53 changes: 48 additions & 5 deletions services/cubejs/src/utils/smart-generation/cubeBuilder.js
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,28 @@ function filtersToSqlConditions(filters) {
return conditions.join(' AND ');
}

/** Quote a column identifier for use in generated SELECT lists (simple names unquoted). */
function quoteChIdentForSelectList(name) {
if (!name || typeof name !== 'string') return null;
if (/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name)) return name;
return `\`${name.replace(/`/g, '')}\``;
}

/**
* `SELECT *` plus explicit ALIAS column names (comma-separated) for ClickHouse.
*
* @param {string} qualifiedTable - e.g. db.table
* @param {string[]} [aliasColumnNames]
* @returns {string}
*/
function formatSelectStarWithAliasColumns(qualifiedTable, aliasColumnNames) {
const extras = (aliasColumnNames || [])
.map((n) => quoteChIdentForSelectList(n))
.filter(Boolean);
const starSuffix = extras.length > 0 ? `, ${extras.join(', ')}` : '';
return `SELECT *${starSuffix} FROM ${qualifiedTable}`;
}

/**
* Build the SQL expression for the cube source.
*
Expand All @@ -171,9 +193,10 @@ function filtersToSqlConditions(filters) {
* @param {string|null} partition - Partition value
* @param {boolean} isInternal - Whether the table is in internalTables
* @param {Array<{ column: string, operator: string, value: * }>} [filters]
* @param {string[]} [aliasColumnNames] - ClickHouse ALIAS columns to list after SELECT *
* @returns {{ sql_table?: string, sql?: string }}
*/
function buildCubeSource(schema, table, partition, isInternal, filters) {
function buildCubeSource(schema, table, partition, isInternal, filters, aliasColumnNames = []) {
const qualifiedTable = schema ? `${schema}.${table}` : table;
const conditions = [];

Expand All @@ -187,7 +210,12 @@ function buildCubeSource(schema, table, partition, isInternal, filters) {

if (conditions.length > 0) {
return {
sql: `SELECT * FROM ${qualifiedTable} WHERE ${conditions.join(' AND ')}`,
sql: `${formatSelectStarWithAliasColumns(qualifiedTable, aliasColumnNames)} WHERE ${conditions.join(' AND ')}`,
};
}
if (aliasColumnNames && aliasColumnNames.length > 0) {
return {
sql: formatSelectStarWithAliasColumns(qualifiedTable, aliasColumnNames),
};
}
return { sql_table: qualifiedTable };
Expand Down Expand Up @@ -673,6 +701,7 @@ function buildRawCube(profiledTable, options) {
primaryKeys = [],
cubeName: cubeNameOverride,
filters = [],
aliasColumnNames = [],
} = options;
const arrayJoinGroups = nestedFilters.map((nf) => nf.group);

Expand All @@ -681,7 +710,7 @@ function buildRawCube(profiledTable, options) {
const cubeName = cubeNameOverride || sanitizeCubeName(table);
const isInternal = internalTables.includes(table);

const source = buildCubeSource(schema, table, partition, isInternal, filters);
const source = buildCubeSource(schema, table, partition, isInternal, filters, aliasColumnNames);

const { dimensions, measures, mapKeysDiscovered, columnsProfiled, columnsSkipped } =
processColumns(profiledTable.columns, {
Expand Down Expand Up @@ -851,6 +880,7 @@ function buildArrayJoinCube(profiledTable, arrayJoinGroups, rawCube, options) {
partition = null,
internalTables = [],
nestedFilters = [],
aliasColumnNames = [],
} = options;

const schema = profiledTable.database;
Expand Down Expand Up @@ -925,17 +955,25 @@ function buildArrayJoinCube(profiledTable, arrayJoinGroups, rawCube, options) {
// The ARRAY JOIN aliases must also be in the SELECT so they're visible
// when Cube.js wraps this in a subquery.
const selectParts = [];
const basePhysicalNames = new Set();
for (const [colName, colData] of profiledTable.columns) {
// Skip ALL nested/grouped columns — they're Array types and can't be
// used directly in SQL. ARRAY JOIN group children are added below as
// scalar alias names. Other groups (e.g. location.*) are excluded since
// they don't have ARRAY JOIN expansion in this cube.
if (colData.columnType === ColumnType.GROUPED) continue;
if (colData.columnType === ColumnType.NESTED) continue;
basePhysicalNames.add(colName);
// Only backtick-quote names with dots or special chars; simple names stay unquoted
const needsQuote = /[^a-zA-Z0-9_]/.test(colName);
selectParts.push(needsQuote ? ` \`${colName}\`` : ` ${colName}`);
}
// ALIAS columns may be omitted from the profiler map but must appear in SELECT
for (const aliasName of aliasColumnNames) {
if (basePhysicalNames.has(aliasName)) continue;
const needsQuote = /[^a-zA-Z0-9_]/.test(aliasName);
selectParts.push(needsQuote ? ` \`${aliasName}\`` : ` ${aliasName}`);
}
// Add ARRAY JOIN alias names (scalar after JOIN) so they project into
// Cube.js subquery scope. Use the alias name only (not "x AS y" again).
for (const [, cols] of groupColumns) {
Expand All @@ -950,7 +988,7 @@ function buildArrayJoinCube(profiledTable, arrayJoinGroups, rawCube, options) {
}
sql = `SELECT\n${selectParts.join(',\n')}\nFROM ${schema}.${table}\nLEFT ARRAY JOIN\n${ajParts.join(',\n')}`;
} else {
sql = `SELECT * FROM ${schema}.${table}`;
sql = formatSelectStarWithAliasColumns(`${schema}.${table}`, aliasColumnNames);
}

// Collect WHERE conditions — use aliased names (dots → underscores)
Expand Down Expand Up @@ -1105,6 +1143,7 @@ function buildArrayJoinCube(profiledTable, arrayJoinGroups, rawCube, options) {
* @param {Array<{column: string, alias: string}>} [options.arrayJoinColumns] - Columns for ARRAY JOIN
* @param {number} [options.maxMapKeys] - Max Map keys per column (default 500)
* @param {string[]} [options.primaryKeys] - Primary key column names
* @param {string[]} [options.aliasColumnNames] - ClickHouse ALIAS columns to append after SELECT *
* @returns {{
* cubes: object[],
* summary: {
Expand Down Expand Up @@ -1232,7 +1271,11 @@ export function buildCubes(profiledTable, options = {}) {
legacyCube.name = sanitizeCubeName(`${profiledTable.table}_${ajDef.alias}`);
const qualifiedTable = `${profiledTable.database}.${profiledTable.table}`;
const isInternal = (options.internalTables || []).includes(profiledTable.table);
let legacySql = `SELECT *, ${ajDef.column} AS ${ajDef.alias} FROM ${qualifiedTable} LEFT ARRAY JOIN ${ajDef.column} AS ${ajDef.alias}`;
const aliasExtra = (options.aliasColumnNames || [])
.map((n) => quoteChIdentForSelectList(n))
.filter(Boolean);
const aliasPrefix = aliasExtra.length > 0 ? `, ${aliasExtra.join(', ')}` : '';
let legacySql = `SELECT *${aliasPrefix}, ${ajDef.column} AS ${ajDef.alias} FROM ${qualifiedTable} LEFT ARRAY JOIN ${ajDef.column} AS ${ajDef.alias}`;
if (isInternal && options.partition) {
legacySql += ` WHERE partition = '${options.partition}'`;
}
Expand Down
60 changes: 54 additions & 6 deletions services/cubejs/src/utils/smart-generation/yamlGenerator.js
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,54 @@ function sqlToJsTemplate(sql) {
return js;
}

/**
* Convert `{CUBE}` / `{measure}` refs the same way as sqlToJsTemplate, without escaping.
*/
function cubeSqlRefsToJsInterpolation(sql) {
return sql.replace(/(?<!\$)\{([^}]+)\}/g, '${$1}');
}

/**
* Cube base `sql` / `sql_table` — use JSON.stringify whenever possible so
* ClickHouse `` `col` `` appears as real backticks in the file (not `\\``).
* Template literals are only needed when SQL embeds `{CUBE}` / `{FILTER_PARAMS…}`.
*
* @param {string} sql
* @returns {boolean}
*/
function cubeBaseSqlNeedsTemplateLiteral(sql) {
if (!sql || typeof sql !== 'string') return false;
return /\{CUBE\}/.test(sql) || /\{FILTER_PARAMS/.test(sql);
}

/**
* Emit cube-level `sql` or `sql_table` for generated JS models.
*
* @param {string} sql
* @returns {string} JS expression source
*/
function emitCubeBaseSqlForJsModel(sql) {
if (cubeBaseSqlNeedsTemplateLiteral(sql)) {
return `\`${sqlToJsTemplate(sql)}\``;
}
return JSON.stringify(sql);
}

/**
* Emit dimension / measure / segment sql in generated JS: JSON.stringify when
* there is no `${…}` after Cube ref conversion; otherwise template literal.
*
* @param {string} sql
* @returns {string} JS expression source (e.g. `"SELECT …"` or `` `…${CUBE}…` ``)
*/
function emitSqlForJsModel(sql) {
const withRefs = cubeSqlRefsToJsInterpolation(sql);
if (/\$\{/.test(withRefs)) {
return `\`${sqlToJsTemplate(sql)}\``;
}
return JSON.stringify(withRefs);
}

/**
* Serialize a meta object as a JS object literal string.
* @param {object} meta
Expand Down Expand Up @@ -211,11 +259,11 @@ export function generateJs(cubeDefinitions) {

lines.push(`cube(\`${formatted.name}\`, {`);

// Source
// Source — cube-level SQL: prefer JSON.stringify so CH `` `col` `` is not escaped as \\`
if (formatted.sql_table) {
lines.push(` sql_table: \`${escapeTemplateLiteral(formatted.sql_table)}\`,`);
lines.push(` sql_table: ${emitCubeBaseSqlForJsModel(formatted.sql_table)},`);
} else if (formatted.sql) {
lines.push(` sql: \`${sqlToJsTemplate(formatted.sql)}\`,`);
lines.push(` sql: ${emitCubeBaseSqlForJsModel(formatted.sql)},`);
}

// Cube-level scalar properties
Expand All @@ -238,7 +286,7 @@ export function generateJs(cubeDefinitions) {
lines.push(' dimensions: {');
for (const dim of formatted.dimensions) {
lines.push(` ${dim.name}: {`);
lines.push(` sql: \`${sqlToJsTemplate(dim.sql)}\`,`);
lines.push(` sql: ${emitSqlForJsModel(dim.sql)},`);
lines.push(` type: \`${dim.type}\`,`);
if (dim.description) {
lines.push(` description: ${JSON.stringify(dim.description)},`);
Expand Down Expand Up @@ -276,7 +324,7 @@ export function generateJs(cubeDefinitions) {
lines.push(' measures: {');
for (const m of formatted.measures) {
lines.push(` ${m.name}: {`);
lines.push(` sql: \`${sqlToJsTemplate(m.sql)}\`,`);
lines.push(` sql: ${emitSqlForJsModel(m.sql)},`);
lines.push(` type: \`${m.type}\`,`);
if (m.description) {
lines.push(` description: ${JSON.stringify(m.description)},`);
Expand Down Expand Up @@ -326,7 +374,7 @@ export function generateJs(cubeDefinitions) {
lines.push(' segments: {');
for (const seg of formatted.segments) {
lines.push(` ${seg.name}: {`);
lines.push(` sql: \`${sqlToJsTemplate(seg.sql)}\`,`);
lines.push(` sql: ${emitSqlForJsModel(seg.sql)},`);
if (seg.title) {
lines.push(` title: ${JSON.stringify(seg.title)},`);
}
Expand Down
Loading