Skip to content

Commit 00d7626

Browse files
feat(cubejs): add support for ClickHouse ALIAS columns in cube SQL generation
- Introduced `fetchClickHouseAliasColumnNames` utility to retrieve ALIAS column names from ClickHouse. - Updated `buildCubeSource` and `buildRawCube` functions to include ALIAS columns in the generated SQL after `SELECT *`. - Enhanced `yamlGenerator` to emit cube-level SQL with proper handling of backticks for ClickHouse compatibility. - Added tests for the new functionality, ensuring correct retrieval and inclusion of ALIAS columns in cube definitions.
1 parent 59e8b6b commit 00d7626

7 files changed

Lines changed: 212 additions & 12 deletions

File tree

services/cubejs/src/routes/smartGenerate.js

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import { deserializeProfile } from '../utils/smart-generation/profileSerializer.
1515
import { diffModels, parseCubesFromJs } from '../utils/smart-generation/diffModels.js';
1616
import { loadRules } from '../utils/queryRewrite.js';
1717
import { validateModelSyntax, smokeTestQuery } from '../utils/smart-generation/modelValidator.js';
18+
import { fetchClickHouseAliasColumnNames } from '../utils/smart-generation/clickHouseAliasColumns.js';
1819

1920
function reorderProfileColumns(profiledTable) {
2021
if (!profiledTable?.columns || !(profiledTable.columns instanceof Map)) return profiledTable;
@@ -216,7 +217,12 @@ export default async (req, res, cubejs) => {
216217
profiledTable = { ...profiledTable, columns: filtered, columnOrder: filteredOrder };
217218
}
218219

219-
// Build cubes
220+
// Build cubes — include ClickHouse ALIAS columns explicitly in cube sql (after SELECT *)
221+
let aliasColumnNames = [];
222+
if (driver) {
223+
aliasColumnNames = await fetchClickHouseAliasColumnNames(driver, schema, table);
224+
}
225+
220226
emitter.emit('building', 'Building cube definitions...', 0.6);
221227
const cubeResult = buildCubes(profiledTable, {
222228
partition,
@@ -227,6 +233,7 @@ export default async (req, res, cubejs) => {
227233
cubeName: cubeNameOverride,
228234
filters,
229235
nestedFilters,
236+
aliasColumnNames,
230237
});
231238

232239
// Store filters in cube-level meta for provenance tracking
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import { describe, it } from 'node:test';
2+
import assert from 'node:assert';
3+
import { fetchClickHouseAliasColumnNames } from '../clickHouseAliasColumns.js';
4+
5+
describe('fetchClickHouseAliasColumnNames', () => {
6+
it('returns distinct names from driver rows (ALIAS-only query)', async () => {
7+
const driver = {
8+
query: async (sql) => {
9+
assert.ok(sql.includes("default_kind = 'ALIAS'"), sql);
10+
return [{ name: 'duration_ratio' }, { name: 'other_alias' }];
11+
},
12+
};
13+
const names = await fetchClickHouseAliasColumnNames(driver, 'dev', 'semantic_events');
14+
assert.deepStrictEqual(names, ['duration_ratio', 'other_alias']);
15+
});
16+
17+
it('reads name from alternate row keys', async () => {
18+
const driver = {
19+
query: async () => [{ Name: 'col_a' }],
20+
};
21+
const names = await fetchClickHouseAliasColumnNames(driver, 'd', 't');
22+
assert.deepStrictEqual(names, ['col_a']);
23+
});
24+
25+
it('returns [] when query throws', async () => {
26+
const driver = {
27+
query: async () => {
28+
throw new Error('no system.columns');
29+
},
30+
};
31+
const names = await fetchClickHouseAliasColumnNames(driver, 'd', 't');
32+
assert.deepStrictEqual(names, []);
33+
});
34+
});

services/cubejs/src/utils/smart-generation/__tests__/cubeBuilder.test.js

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,23 @@ describe('cubeBuilder – buildCubes', () => {
5555
assert.strictEqual(cubes[0].sql_table, 'test_db.events');
5656
});
5757

58+
it('should list ClickHouse ALIAS columns after SELECT * when aliasColumnNames is provided', () => {
59+
const { cubes } = buildCubes(table, { aliasColumnNames: ['duration_ratio'] });
60+
assert.strictEqual(cubes[0].sql, 'SELECT *, duration_ratio FROM test_db.events');
61+
assert.strictEqual(cubes[0].sql_table, undefined);
62+
});
63+
64+
it('should append ALIAS columns to filtered cube sql', () => {
65+
const { cubes } = buildCubes(table, {
66+
aliasColumnNames: ['duration_ratio'],
67+
filters: [{ column: 'id', operator: '=', value: 'x' }],
68+
});
69+
assert.strictEqual(
70+
cubes[0].sql,
71+
"SELECT *, duration_ratio FROM test_db.events WHERE id = 'x'"
72+
);
73+
});
74+
5875
it('should produce dimensions for string and date columns', () => {
5976
const { cubes } = buildCubes(table);
6077
const dimNames = cubes[0].dimensions.map((d) => d.name);

services/cubejs/src/utils/smart-generation/__tests__/yamlGenerator.test.js

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,4 +243,18 @@ describe('yamlGenerator – generateJs advanced properties', () => {
243243
const js = generateJs([cube]);
244244
assert.ok(js.includes('${total_amount}'), 'should convert {measure} to ${measure}');
245245
});
246+
247+
it('emits cube-level sql with JSON string so ClickHouse backticks are not escaped (no \\\\`)', () => {
248+
const sql = 'SELECT *, `commerce.total`, duration_ratio FROM dev.semantic_events';
249+
const cube = {
250+
name: 'semantic_events',
251+
sql,
252+
meta: { auto_generated: true },
253+
dimensions: [],
254+
measures: [],
255+
};
256+
const js = generateJs([cube]);
257+
assert.ok(js.includes('sql: "SELECT *, `commerce.total`, duration_ratio FROM dev.semantic_events"'), js);
258+
assert.ok(!js.includes('\\`commerce'), 'should not escape inner backticks with backslash');
259+
});
246260
});
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/**
2+
* Load ALIAS column names from ClickHouse system.columns so generated cube
3+
* `sql` can list them explicitly after SELECT *.
4+
*
5+
* Only `default_kind = 'ALIAS'` columns are included (not MATERIALIZED/DEFAULT).
6+
*
7+
* @param {object} driver - ClickHouse driver with .query(sql)
8+
* @param {string} database
9+
* @param {string} table
10+
* @returns {Promise<string[]>}
11+
*/
12+
function rowColumnName(row) {
13+
if (!row || typeof row !== 'object') return null;
14+
return row.name ?? row.Name ?? row.column_name ?? null;
15+
}
16+
17+
export async function fetchClickHouseAliasColumnNames(driver, database, table) {
18+
if (!driver?.query || !database || !table) return [];
19+
const db = String(database).replace(/'/g, "''");
20+
const tbl = String(table).replace(/'/g, "''");
21+
try {
22+
const rows = await driver.query(
23+
`SELECT name FROM system.columns `
24+
+ `WHERE database = '${db}' AND table = '${tbl}' `
25+
+ `AND default_kind = 'ALIAS' `
26+
+ `ORDER BY position`
27+
);
28+
const list = Array.isArray(rows) ? rows : [];
29+
const names = list
30+
.map((r) => rowColumnName(r))
31+
.filter((n) => typeof n === 'string' && n.length > 0);
32+
return [...new Set(names)];
33+
} catch (err) {
34+
console.warn(`[smartGenerate] ALIAS column lookup failed (non-fatal): ${err.message}`);
35+
return [];
36+
}
37+
}

services/cubejs/src/utils/smart-generation/cubeBuilder.js

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,28 @@ function filtersToSqlConditions(filters) {
158158
return conditions.join(' AND ');
159159
}
160160

161+
/** Quote a column identifier for use in generated SELECT lists (simple names unquoted). */
162+
function quoteChIdentForSelectList(name) {
163+
if (!name || typeof name !== 'string') return null;
164+
if (/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name)) return name;
165+
return `\`${name.replace(/`/g, '')}\``;
166+
}
167+
168+
/**
169+
* `SELECT *` plus explicit ALIAS column names (comma-separated) for ClickHouse.
170+
*
171+
* @param {string} qualifiedTable - e.g. db.table
172+
* @param {string[]} [aliasColumnNames]
173+
* @returns {string}
174+
*/
175+
function formatSelectStarWithAliasColumns(qualifiedTable, aliasColumnNames) {
176+
const extras = (aliasColumnNames || [])
177+
.map((n) => quoteChIdentForSelectList(n))
178+
.filter(Boolean);
179+
const starSuffix = extras.length > 0 ? `, ${extras.join(', ')}` : '';
180+
return `SELECT *${starSuffix} FROM ${qualifiedTable}`;
181+
}
182+
161183
/**
162184
* Build the SQL expression for the cube source.
163185
*
@@ -171,9 +193,10 @@ function filtersToSqlConditions(filters) {
171193
* @param {string|null} partition - Partition value
172194
* @param {boolean} isInternal - Whether the table is in internalTables
173195
* @param {Array<{ column: string, operator: string, value: * }>} [filters]
196+
* @param {string[]} [aliasColumnNames] - ClickHouse ALIAS columns to list after SELECT *
174197
* @returns {{ sql_table?: string, sql?: string }}
175198
*/
176-
function buildCubeSource(schema, table, partition, isInternal, filters) {
199+
function buildCubeSource(schema, table, partition, isInternal, filters, aliasColumnNames = []) {
177200
const qualifiedTable = schema ? `${schema}.${table}` : table;
178201
const conditions = [];
179202

@@ -187,7 +210,12 @@ function buildCubeSource(schema, table, partition, isInternal, filters) {
187210

188211
if (conditions.length > 0) {
189212
return {
190-
sql: `SELECT * FROM ${qualifiedTable} WHERE ${conditions.join(' AND ')}`,
213+
sql: `${formatSelectStarWithAliasColumns(qualifiedTable, aliasColumnNames)} WHERE ${conditions.join(' AND ')}`,
214+
};
215+
}
216+
if (aliasColumnNames && aliasColumnNames.length > 0) {
217+
return {
218+
sql: formatSelectStarWithAliasColumns(qualifiedTable, aliasColumnNames),
191219
};
192220
}
193221
return { sql_table: qualifiedTable };
@@ -673,6 +701,7 @@ function buildRawCube(profiledTable, options) {
673701
primaryKeys = [],
674702
cubeName: cubeNameOverride,
675703
filters = [],
704+
aliasColumnNames = [],
676705
} = options;
677706
const arrayJoinGroups = nestedFilters.map((nf) => nf.group);
678707

@@ -681,7 +710,7 @@ function buildRawCube(profiledTable, options) {
681710
const cubeName = cubeNameOverride || sanitizeCubeName(table);
682711
const isInternal = internalTables.includes(table);
683712

684-
const source = buildCubeSource(schema, table, partition, isInternal, filters);
713+
const source = buildCubeSource(schema, table, partition, isInternal, filters, aliasColumnNames);
685714

686715
const { dimensions, measures, mapKeysDiscovered, columnsProfiled, columnsSkipped } =
687716
processColumns(profiledTable.columns, {
@@ -851,6 +880,7 @@ function buildArrayJoinCube(profiledTable, arrayJoinGroups, rawCube, options) {
851880
partition = null,
852881
internalTables = [],
853882
nestedFilters = [],
883+
aliasColumnNames = [],
854884
} = options;
855885

856886
const schema = profiledTable.database;
@@ -925,17 +955,25 @@ function buildArrayJoinCube(profiledTable, arrayJoinGroups, rawCube, options) {
925955
// The ARRAY JOIN aliases must also be in the SELECT so they're visible
926956
// when Cube.js wraps this in a subquery.
927957
const selectParts = [];
958+
const basePhysicalNames = new Set();
928959
for (const [colName, colData] of profiledTable.columns) {
929960
// Skip ALL nested/grouped columns — they're Array types and can't be
930961
// used directly in SQL. ARRAY JOIN group children are added below as
931962
// scalar alias names. Other groups (e.g. location.*) are excluded since
932963
// they don't have ARRAY JOIN expansion in this cube.
933964
if (colData.columnType === ColumnType.GROUPED) continue;
934965
if (colData.columnType === ColumnType.NESTED) continue;
966+
basePhysicalNames.add(colName);
935967
// Only backtick-quote names with dots or special chars; simple names stay unquoted
936968
const needsQuote = /[^a-zA-Z0-9_]/.test(colName);
937969
selectParts.push(needsQuote ? ` \`${colName}\`` : ` ${colName}`);
938970
}
971+
// ALIAS columns may be omitted from the profiler map but must appear in SELECT
972+
for (const aliasName of aliasColumnNames) {
973+
if (basePhysicalNames.has(aliasName)) continue;
974+
const needsQuote = /[^a-zA-Z0-9_]/.test(aliasName);
975+
selectParts.push(needsQuote ? ` \`${aliasName}\`` : ` ${aliasName}`);
976+
}
939977
// Add ARRAY JOIN alias names (scalar after JOIN) so they project into
940978
// Cube.js subquery scope. Use the alias name only (not "x AS y" again).
941979
for (const [, cols] of groupColumns) {
@@ -950,7 +988,7 @@ function buildArrayJoinCube(profiledTable, arrayJoinGroups, rawCube, options) {
950988
}
951989
sql = `SELECT\n${selectParts.join(',\n')}\nFROM ${schema}.${table}\nLEFT ARRAY JOIN\n${ajParts.join(',\n')}`;
952990
} else {
953-
sql = `SELECT * FROM ${schema}.${table}`;
991+
sql = formatSelectStarWithAliasColumns(`${schema}.${table}`, aliasColumnNames);
954992
}
955993

956994
// Collect WHERE conditions — use aliased names (dots → underscores)
@@ -1105,6 +1143,7 @@ function buildArrayJoinCube(profiledTable, arrayJoinGroups, rawCube, options) {
11051143
* @param {Array<{column: string, alias: string}>} [options.arrayJoinColumns] - Columns for ARRAY JOIN
11061144
* @param {number} [options.maxMapKeys] - Max Map keys per column (default 500)
11071145
* @param {string[]} [options.primaryKeys] - Primary key column names
1146+
* @param {string[]} [options.aliasColumnNames] - ClickHouse ALIAS columns to append after SELECT *
11081147
* @returns {{
11091148
* cubes: object[],
11101149
* summary: {
@@ -1232,7 +1271,11 @@ export function buildCubes(profiledTable, options = {}) {
12321271
legacyCube.name = sanitizeCubeName(`${profiledTable.table}_${ajDef.alias}`);
12331272
const qualifiedTable = `${profiledTable.database}.${profiledTable.table}`;
12341273
const isInternal = (options.internalTables || []).includes(profiledTable.table);
1235-
let legacySql = `SELECT *, ${ajDef.column} AS ${ajDef.alias} FROM ${qualifiedTable} LEFT ARRAY JOIN ${ajDef.column} AS ${ajDef.alias}`;
1274+
const aliasExtra = (options.aliasColumnNames || [])
1275+
.map((n) => quoteChIdentForSelectList(n))
1276+
.filter(Boolean);
1277+
const aliasPrefix = aliasExtra.length > 0 ? `, ${aliasExtra.join(', ')}` : '';
1278+
let legacySql = `SELECT *${aliasPrefix}, ${ajDef.column} AS ${ajDef.alias} FROM ${qualifiedTable} LEFT ARRAY JOIN ${ajDef.column} AS ${ajDef.alias}`;
12361279
if (isInternal && options.partition) {
12371280
legacySql += ` WHERE partition = '${options.partition}'`;
12381281
}

services/cubejs/src/utils/smart-generation/yamlGenerator.js

Lines changed: 54 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,54 @@ function sqlToJsTemplate(sql) {
170170
return js;
171171
}
172172

173+
/**
174+
* Convert `{CUBE}` / `{measure}` refs the same way as sqlToJsTemplate, without escaping.
175+
*/
176+
function cubeSqlRefsToJsInterpolation(sql) {
177+
return sql.replace(/(?<!\$)\{([^}]+)\}/g, '${$1}');
178+
}
179+
180+
/**
181+
* Cube base `sql` / `sql_table` — use JSON.stringify whenever possible so
182+
* ClickHouse `` `col` `` appears as real backticks in the file (not `\\``).
183+
* Template literals are only needed when SQL embeds `{CUBE}` / `{FILTER_PARAMS…}`.
184+
*
185+
* @param {string} sql
186+
* @returns {boolean}
187+
*/
188+
function cubeBaseSqlNeedsTemplateLiteral(sql) {
189+
if (!sql || typeof sql !== 'string') return false;
190+
return /\{CUBE\}/.test(sql) || /\{FILTER_PARAMS/.test(sql);
191+
}
192+
193+
/**
194+
* Emit cube-level `sql` or `sql_table` for generated JS models.
195+
*
196+
* @param {string} sql
197+
* @returns {string} JS expression source
198+
*/
199+
function emitCubeBaseSqlForJsModel(sql) {
200+
if (cubeBaseSqlNeedsTemplateLiteral(sql)) {
201+
return `\`${sqlToJsTemplate(sql)}\``;
202+
}
203+
return JSON.stringify(sql);
204+
}
205+
206+
/**
207+
* Emit dimension / measure / segment sql in generated JS: JSON.stringify when
208+
* there is no `${…}` after Cube ref conversion; otherwise template literal.
209+
*
210+
* @param {string} sql
211+
* @returns {string} JS expression source (e.g. `"SELECT …"` or `` `…${CUBE}…` ``)
212+
*/
213+
function emitSqlForJsModel(sql) {
214+
const withRefs = cubeSqlRefsToJsInterpolation(sql);
215+
if (/\$\{/.test(withRefs)) {
216+
return `\`${sqlToJsTemplate(sql)}\``;
217+
}
218+
return JSON.stringify(withRefs);
219+
}
220+
173221
/**
174222
* Serialize a meta object as a JS object literal string.
175223
* @param {object} meta
@@ -211,11 +259,11 @@ export function generateJs(cubeDefinitions) {
211259

212260
lines.push(`cube(\`${formatted.name}\`, {`);
213261

214-
// Source
262+
// Source — cube-level SQL: prefer JSON.stringify so CH `` `col` `` is not escaped as \\`
215263
if (formatted.sql_table) {
216-
lines.push(` sql_table: \`${escapeTemplateLiteral(formatted.sql_table)}\`,`);
264+
lines.push(` sql_table: ${emitCubeBaseSqlForJsModel(formatted.sql_table)},`);
217265
} else if (formatted.sql) {
218-
lines.push(` sql: \`${sqlToJsTemplate(formatted.sql)}\`,`);
266+
lines.push(` sql: ${emitCubeBaseSqlForJsModel(formatted.sql)},`);
219267
}
220268

221269
// Cube-level scalar properties
@@ -238,7 +286,7 @@ export function generateJs(cubeDefinitions) {
238286
lines.push(' dimensions: {');
239287
for (const dim of formatted.dimensions) {
240288
lines.push(` ${dim.name}: {`);
241-
lines.push(` sql: \`${sqlToJsTemplate(dim.sql)}\`,`);
289+
lines.push(` sql: ${emitSqlForJsModel(dim.sql)},`);
242290
lines.push(` type: \`${dim.type}\`,`);
243291
if (dim.description) {
244292
lines.push(` description: ${JSON.stringify(dim.description)},`);
@@ -276,7 +324,7 @@ export function generateJs(cubeDefinitions) {
276324
lines.push(' measures: {');
277325
for (const m of formatted.measures) {
278326
lines.push(` ${m.name}: {`);
279-
lines.push(` sql: \`${sqlToJsTemplate(m.sql)}\`,`);
327+
lines.push(` sql: ${emitSqlForJsModel(m.sql)},`);
280328
lines.push(` type: \`${m.type}\`,`);
281329
if (m.description) {
282330
lines.push(` description: ${JSON.stringify(m.description)},`);
@@ -326,7 +374,7 @@ export function generateJs(cubeDefinitions) {
326374
lines.push(' segments: {');
327375
for (const seg of formatted.segments) {
328376
lines.push(` ${seg.name}: {`);
329-
lines.push(` sql: \`${sqlToJsTemplate(seg.sql)}\`,`);
377+
lines.push(` sql: ${emitSqlForJsModel(seg.sql)},`);
330378
if (seg.title) {
331379
lines.push(` title: ${JSON.stringify(seg.title)},`);
332380
}

0 commit comments

Comments
 (0)