diff --git a/services/cubejs/src/routes/smartGenerate.js b/services/cubejs/src/routes/smartGenerate.js index e97aa6e7..e40d0772 100644 --- a/services/cubejs/src/routes/smartGenerate.js +++ b/services/cubejs/src/routes/smartGenerate.js @@ -15,6 +15,7 @@ import { deserializeProfile } from '../utils/smart-generation/profileSerializer. import { diffModels, parseCubesFromJs } from '../utils/smart-generation/diffModels.js'; import { loadRules } from '../utils/queryRewrite.js'; import { validateModelSyntax, smokeTestQuery } from '../utils/smart-generation/modelValidator.js'; +import { fetchClickHouseAliasColumnNames } from '../utils/smart-generation/clickHouseAliasColumns.js'; function reorderProfileColumns(profiledTable) { if (!profiledTable?.columns || !(profiledTable.columns instanceof Map)) return profiledTable; @@ -216,7 +217,12 @@ export default async (req, res, cubejs) => { profiledTable = { ...profiledTable, columns: filtered, columnOrder: filteredOrder }; } - // Build cubes + // Build cubes — include ClickHouse ALIAS columns explicitly in cube sql (after SELECT *) + let aliasColumnNames = []; + if (driver) { + aliasColumnNames = await fetchClickHouseAliasColumnNames(driver, schema, table); + } + emitter.emit('building', 'Building cube definitions...', 0.6); const cubeResult = buildCubes(profiledTable, { partition, @@ -227,6 +233,7 @@ export default async (req, res, cubejs) => { cubeName: cubeNameOverride, filters, nestedFilters, + aliasColumnNames, }); // Store filters in cube-level meta for provenance tracking diff --git a/services/cubejs/src/utils/smart-generation/__tests__/clickHouseAliasColumns.test.js b/services/cubejs/src/utils/smart-generation/__tests__/clickHouseAliasColumns.test.js new file mode 100644 index 00000000..82866c14 --- /dev/null +++ b/services/cubejs/src/utils/smart-generation/__tests__/clickHouseAliasColumns.test.js @@ -0,0 +1,34 @@ +import { describe, it } from 'node:test'; +import assert from 'node:assert'; +import { fetchClickHouseAliasColumnNames } from '../clickHouseAliasColumns.js'; + +describe('fetchClickHouseAliasColumnNames', () => { + it('returns distinct names from driver rows (ALIAS-only query)', async () => { + const driver = { + query: async (sql) => { + assert.ok(sql.includes("default_kind = 'ALIAS'"), sql); + return [{ name: 'duration_ratio' }, { name: 'other_alias' }]; + }, + }; + const names = await fetchClickHouseAliasColumnNames(driver, 'dev', 'semantic_events'); + assert.deepStrictEqual(names, ['duration_ratio', 'other_alias']); + }); + + it('reads name from alternate row keys', async () => { + const driver = { + query: async () => [{ Name: 'col_a' }], + }; + const names = await fetchClickHouseAliasColumnNames(driver, 'd', 't'); + assert.deepStrictEqual(names, ['col_a']); + }); + + it('returns [] when query throws', async () => { + const driver = { + query: async () => { + throw new Error('no system.columns'); + }, + }; + const names = await fetchClickHouseAliasColumnNames(driver, 'd', 't'); + assert.deepStrictEqual(names, []); + }); +}); diff --git a/services/cubejs/src/utils/smart-generation/__tests__/cubeBuilder.test.js b/services/cubejs/src/utils/smart-generation/__tests__/cubeBuilder.test.js index 7042a0dd..37c17822 100644 --- a/services/cubejs/src/utils/smart-generation/__tests__/cubeBuilder.test.js +++ b/services/cubejs/src/utils/smart-generation/__tests__/cubeBuilder.test.js @@ -55,6 +55,23 @@ describe('cubeBuilder – buildCubes', () => { assert.strictEqual(cubes[0].sql_table, 'test_db.events'); }); + it('should list ClickHouse ALIAS columns after SELECT * when aliasColumnNames is provided', () => { + const { cubes } = buildCubes(table, { aliasColumnNames: ['duration_ratio'] }); + assert.strictEqual(cubes[0].sql, 'SELECT *, duration_ratio FROM test_db.events'); + assert.strictEqual(cubes[0].sql_table, undefined); + }); + + it('should append ALIAS columns to filtered cube sql', () => { + const { cubes } = buildCubes(table, { + aliasColumnNames: ['duration_ratio'], + filters: [{ column: 'id', operator: '=', value: 'x' }], + }); + assert.strictEqual( + cubes[0].sql, + "SELECT *, duration_ratio FROM test_db.events WHERE id = 'x'" + ); + }); + it('should produce dimensions for string and date columns', () => { const { cubes } = buildCubes(table); const dimNames = cubes[0].dimensions.map((d) => d.name); diff --git a/services/cubejs/src/utils/smart-generation/__tests__/yamlGenerator.test.js b/services/cubejs/src/utils/smart-generation/__tests__/yamlGenerator.test.js index 2c77c2c0..a8861db5 100644 --- a/services/cubejs/src/utils/smart-generation/__tests__/yamlGenerator.test.js +++ b/services/cubejs/src/utils/smart-generation/__tests__/yamlGenerator.test.js @@ -243,4 +243,18 @@ describe('yamlGenerator – generateJs advanced properties', () => { const js = generateJs([cube]); assert.ok(js.includes('${total_amount}'), 'should convert {measure} to ${measure}'); }); + + it('emits cube-level sql with JSON string so ClickHouse backticks are not escaped (no \\\\`)', () => { + const sql = 'SELECT *, `commerce.total`, duration_ratio FROM dev.semantic_events'; + const cube = { + name: 'semantic_events', + sql, + meta: { auto_generated: true }, + dimensions: [], + measures: [], + }; + const js = generateJs([cube]); + assert.ok(js.includes('sql: "SELECT *, `commerce.total`, duration_ratio FROM dev.semantic_events"'), js); + assert.ok(!js.includes('\\`commerce'), 'should not escape inner backticks with backslash'); + }); }); diff --git a/services/cubejs/src/utils/smart-generation/clickHouseAliasColumns.js b/services/cubejs/src/utils/smart-generation/clickHouseAliasColumns.js new file mode 100644 index 00000000..e7fb2827 --- /dev/null +++ b/services/cubejs/src/utils/smart-generation/clickHouseAliasColumns.js @@ -0,0 +1,37 @@ +/** + * Load ALIAS column names from ClickHouse system.columns so generated cube + * `sql` can list them explicitly after SELECT *. + * + * Only `default_kind = 'ALIAS'` columns are included (not MATERIALIZED/DEFAULT). + * + * @param {object} driver - ClickHouse driver with .query(sql) + * @param {string} database + * @param {string} table + * @returns {Promise} + */ +function rowColumnName(row) { + if (!row || typeof row !== 'object') return null; + return row.name ?? row.Name ?? row.column_name ?? null; +} + +export async function fetchClickHouseAliasColumnNames(driver, database, table) { + if (!driver?.query || !database || !table) return []; + const db = String(database).replace(/'/g, "''"); + const tbl = String(table).replace(/'/g, "''"); + try { + const rows = await driver.query( + `SELECT name FROM system.columns ` + + `WHERE database = '${db}' AND table = '${tbl}' ` + + `AND default_kind = 'ALIAS' ` + + `ORDER BY position` + ); + const list = Array.isArray(rows) ? rows : []; + const names = list + .map((r) => rowColumnName(r)) + .filter((n) => typeof n === 'string' && n.length > 0); + return [...new Set(names)]; + } catch (err) { + console.warn(`[smartGenerate] ALIAS column lookup failed (non-fatal): ${err.message}`); + return []; + } +} diff --git a/services/cubejs/src/utils/smart-generation/cubeBuilder.js b/services/cubejs/src/utils/smart-generation/cubeBuilder.js index 0ea70d59..0a2dd453 100644 --- a/services/cubejs/src/utils/smart-generation/cubeBuilder.js +++ b/services/cubejs/src/utils/smart-generation/cubeBuilder.js @@ -158,6 +158,28 @@ function filtersToSqlConditions(filters) { return conditions.join(' AND '); } +/** Quote a column identifier for use in generated SELECT lists (simple names unquoted). */ +function quoteChIdentForSelectList(name) { + if (!name || typeof name !== 'string') return null; + if (/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name)) return name; + return `\`${name.replace(/`/g, '')}\``; +} + +/** + * `SELECT *` plus explicit ALIAS column names (comma-separated) for ClickHouse. + * + * @param {string} qualifiedTable - e.g. db.table + * @param {string[]} [aliasColumnNames] + * @returns {string} + */ +function formatSelectStarWithAliasColumns(qualifiedTable, aliasColumnNames) { + const extras = (aliasColumnNames || []) + .map((n) => quoteChIdentForSelectList(n)) + .filter(Boolean); + const starSuffix = extras.length > 0 ? `, ${extras.join(', ')}` : ''; + return `SELECT *${starSuffix} FROM ${qualifiedTable}`; +} + /** * Build the SQL expression for the cube source. * @@ -171,9 +193,10 @@ function filtersToSqlConditions(filters) { * @param {string|null} partition - Partition value * @param {boolean} isInternal - Whether the table is in internalTables * @param {Array<{ column: string, operator: string, value: * }>} [filters] + * @param {string[]} [aliasColumnNames] - ClickHouse ALIAS columns to list after SELECT * * @returns {{ sql_table?: string, sql?: string }} */ -function buildCubeSource(schema, table, partition, isInternal, filters) { +function buildCubeSource(schema, table, partition, isInternal, filters, aliasColumnNames = []) { const qualifiedTable = schema ? `${schema}.${table}` : table; const conditions = []; @@ -187,7 +210,12 @@ function buildCubeSource(schema, table, partition, isInternal, filters) { if (conditions.length > 0) { return { - sql: `SELECT * FROM ${qualifiedTable} WHERE ${conditions.join(' AND ')}`, + sql: `${formatSelectStarWithAliasColumns(qualifiedTable, aliasColumnNames)} WHERE ${conditions.join(' AND ')}`, + }; + } + if (aliasColumnNames && aliasColumnNames.length > 0) { + return { + sql: formatSelectStarWithAliasColumns(qualifiedTable, aliasColumnNames), }; } return { sql_table: qualifiedTable }; @@ -673,6 +701,7 @@ function buildRawCube(profiledTable, options) { primaryKeys = [], cubeName: cubeNameOverride, filters = [], + aliasColumnNames = [], } = options; const arrayJoinGroups = nestedFilters.map((nf) => nf.group); @@ -681,7 +710,7 @@ function buildRawCube(profiledTable, options) { const cubeName = cubeNameOverride || sanitizeCubeName(table); const isInternal = internalTables.includes(table); - const source = buildCubeSource(schema, table, partition, isInternal, filters); + const source = buildCubeSource(schema, table, partition, isInternal, filters, aliasColumnNames); const { dimensions, measures, mapKeysDiscovered, columnsProfiled, columnsSkipped } = processColumns(profiledTable.columns, { @@ -851,6 +880,7 @@ function buildArrayJoinCube(profiledTable, arrayJoinGroups, rawCube, options) { partition = null, internalTables = [], nestedFilters = [], + aliasColumnNames = [], } = options; const schema = profiledTable.database; @@ -925,6 +955,7 @@ function buildArrayJoinCube(profiledTable, arrayJoinGroups, rawCube, options) { // The ARRAY JOIN aliases must also be in the SELECT so they're visible // when Cube.js wraps this in a subquery. const selectParts = []; + const basePhysicalNames = new Set(); for (const [colName, colData] of profiledTable.columns) { // Skip ALL nested/grouped columns — they're Array types and can't be // used directly in SQL. ARRAY JOIN group children are added below as @@ -932,10 +963,17 @@ function buildArrayJoinCube(profiledTable, arrayJoinGroups, rawCube, options) { // they don't have ARRAY JOIN expansion in this cube. if (colData.columnType === ColumnType.GROUPED) continue; if (colData.columnType === ColumnType.NESTED) continue; + basePhysicalNames.add(colName); // Only backtick-quote names with dots or special chars; simple names stay unquoted const needsQuote = /[^a-zA-Z0-9_]/.test(colName); selectParts.push(needsQuote ? ` \`${colName}\`` : ` ${colName}`); } + // ALIAS columns may be omitted from the profiler map but must appear in SELECT + for (const aliasName of aliasColumnNames) { + if (basePhysicalNames.has(aliasName)) continue; + const needsQuote = /[^a-zA-Z0-9_]/.test(aliasName); + selectParts.push(needsQuote ? ` \`${aliasName}\`` : ` ${aliasName}`); + } // Add ARRAY JOIN alias names (scalar after JOIN) so they project into // Cube.js subquery scope. Use the alias name only (not "x AS y" again). for (const [, cols] of groupColumns) { @@ -950,7 +988,7 @@ function buildArrayJoinCube(profiledTable, arrayJoinGroups, rawCube, options) { } sql = `SELECT\n${selectParts.join(',\n')}\nFROM ${schema}.${table}\nLEFT ARRAY JOIN\n${ajParts.join(',\n')}`; } else { - sql = `SELECT * FROM ${schema}.${table}`; + sql = formatSelectStarWithAliasColumns(`${schema}.${table}`, aliasColumnNames); } // Collect WHERE conditions — use aliased names (dots → underscores) @@ -1105,6 +1143,7 @@ function buildArrayJoinCube(profiledTable, arrayJoinGroups, rawCube, options) { * @param {Array<{column: string, alias: string}>} [options.arrayJoinColumns] - Columns for ARRAY JOIN * @param {number} [options.maxMapKeys] - Max Map keys per column (default 500) * @param {string[]} [options.primaryKeys] - Primary key column names + * @param {string[]} [options.aliasColumnNames] - ClickHouse ALIAS columns to append after SELECT * * @returns {{ * cubes: object[], * summary: { @@ -1232,7 +1271,11 @@ export function buildCubes(profiledTable, options = {}) { legacyCube.name = sanitizeCubeName(`${profiledTable.table}_${ajDef.alias}`); const qualifiedTable = `${profiledTable.database}.${profiledTable.table}`; const isInternal = (options.internalTables || []).includes(profiledTable.table); - let legacySql = `SELECT *, ${ajDef.column} AS ${ajDef.alias} FROM ${qualifiedTable} LEFT ARRAY JOIN ${ajDef.column} AS ${ajDef.alias}`; + const aliasExtra = (options.aliasColumnNames || []) + .map((n) => quoteChIdentForSelectList(n)) + .filter(Boolean); + const aliasPrefix = aliasExtra.length > 0 ? `, ${aliasExtra.join(', ')}` : ''; + let legacySql = `SELECT *${aliasPrefix}, ${ajDef.column} AS ${ajDef.alias} FROM ${qualifiedTable} LEFT ARRAY JOIN ${ajDef.column} AS ${ajDef.alias}`; if (isInternal && options.partition) { legacySql += ` WHERE partition = '${options.partition}'`; } diff --git a/services/cubejs/src/utils/smart-generation/yamlGenerator.js b/services/cubejs/src/utils/smart-generation/yamlGenerator.js index 48793ba9..0395d136 100644 --- a/services/cubejs/src/utils/smart-generation/yamlGenerator.js +++ b/services/cubejs/src/utils/smart-generation/yamlGenerator.js @@ -170,6 +170,54 @@ function sqlToJsTemplate(sql) { return js; } +/** + * Convert `{CUBE}` / `{measure}` refs the same way as sqlToJsTemplate, without escaping. + */ +function cubeSqlRefsToJsInterpolation(sql) { + return sql.replace(/(?