Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/regression.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
strategy:
fail-fast: false
matrix:
solution: [data.table, collapse, dplyr, pandas, pydatatable, spark, juliadf, juliads, polars, arrow, duckdb, duckdb-latest, datafusion]
solution: [data.table, collapse, dplyr, pandas, pydatatable, spark, juliadf, juliads, polars, arrow, duckdb, duckdb-latest, datafusion, clickhouse]
name: Regression Tests solo solutions
runs-on: ubuntu-20.04
env:
Expand Down Expand Up @@ -54,6 +54,9 @@ jobs:
shell: bash
run: sudo swapoff -a

- name: Setup tmate session
uses: mxschmitt/action-tmate@v3

- name: Run mini GroupBy benchmark
shell: bash
run: |
Expand Down
2 changes: 0 additions & 2 deletions _benchplot/benchplot-dict.R
Original file line number Diff line number Diff line change
Expand Up @@ -490,8 +490,6 @@ join.data.exceptions = {list(
"juliads" = {list(
)},
"clickhouse" = {list(
"out of memory" = c("J1_1e9_NA_0_0", # q1 r2 #169
"J1_1e9_NA_5_0","J1_1e9_NA_0_1") # q1 r1
)},
"polars" = {list(
"out of memory" = c("J1_1e9_NA_0_0","J1_1e9_NA_5_0","J1_1e9_NA_0_1")
Expand Down
3 changes: 1 addition & 2 deletions _launcher/launcher.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ file.ext = function(x) {
x,
"collapse"=, "data.table"=, "dplyr"=, "h2o"=, "arrow"=, "duckdb"="R", "duckdb-latest"="R",
"pandas"=, "spark"=, "pydatatable"=, "modin"=, "dask"=, "datafusion"=, "polars"="py",
"clickhouse"="sql",
"juliadf"="jl", "juliads"="jl",
"clickhouse"="sh", "juliadf"="jl", "juliads"="jl",
)
if (is.null(ans)) stop(sprintf("solution %s does not have file extension defined in file.ext helper function", x))
ans
Expand Down
3 changes: 1 addition & 2 deletions _launcher/solution.R
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,7 @@ file.ext = function(x) {
x,
"collapse"=, "data.table"=, "dplyr"=, "h2o"=, "arrow"=, "duckdb"="R", "duckdb-latest"="R",
"pandas"="py", "spark"=, "pydatatable"=, "modin"=, "dask"=, "datafusion"=, "polars"="py",
"clickhouse"="sql",
"juliadf"="jl", "juliads"="jl"
"clickhouse"="sh", "juliadf"="jl", "juliads"="jl"
)
if (is.null(ans)) stop(sprintf("solution %s does not have file extension defined in file.ext helper function", x))
ans
Expand Down
8 changes: 2 additions & 6 deletions _utils/install_all_solutions.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,7 @@ def install_all_solutions():
with open(SOLUTIONS_FILENAME, newline="") as solutions_file:
solutions = csv.DictReader(solutions_file, delimiter=',')
for row in solutions:
if row['solution'] == "clickhouse":
continue
elif row['solution'] == "data.table":
if row['solution'] == "data.table":
install_solutions.add("datatable")
else:
install_solutions.add(row['solution'])
Expand All @@ -44,9 +42,7 @@ def install_all_solutions():
if solution.strip() == "all":
install_all_solutions()
else:
if solution == "clickhouse":
continue
elif solution == "data.table":
if solution == "data.table":
install_solution("datatable")
else:
install_solution(solution)
Expand Down
2 changes: 1 addition & 1 deletion _utils/prep_solutions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
SOLUTIONS_FILENAME = "_control/solutions.csv"
RUN_CONF_FILENAME = "run.conf"

SKIPPED_SOLUTIONS = ["clickhouse"]
SKIPPED_SOLUTIONS = []


def print_usage():
Expand Down
2 changes: 1 addition & 1 deletion clickhouse/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
23.9.1.1854
23.10.2.13
49 changes: 46 additions & 3 deletions clickhouse/ch.sh
Original file line number Diff line number Diff line change
@@ -1,16 +1,59 @@
ch_installed() {
dpkg-query -Wf'${db:Status-abbrev}' clickhouse-server 2>/dev/null | grep -q '^i'
}

ch_active() {
clickhouse-client --query="SELECT 0;" > /dev/null 2>&1
local ret=$?;
if [[ $ret -eq 0 ]]; then return 0; elif [[ $ret -eq 210 ]]; then return 1; else echo "Unexpected return code from clickhouse-client: $ret" >&2 && return 1; fi;
}

ch_wait() {
for _ in $(seq 1 60); do if [[ $(wget -q 'localhost:8123' -O-) == 'Ok.' ]]; then break ; else sleep 1; fi ; done
ch_active
}

ch_start() {
echo '# ch_start: starting clickhouse-server'
sudo service clickhouse-server start && sleep 15
sudo service clickhouse-server start
ch_wait
}

ch_stop() {
echo '# ch_stop: stopping clickhouse-server'
sudo service clickhouse-server stop && sleep 15
}
ch_active() {
clickhouse-client --query="SELECT 0;" > /dev/null 2>&1

ch_query() {
ENGINE=Memory
if [ $ON_DISK -eq 1 ]; then
ENGINE="MergeTree ORDER BY tuple()"
fi
sudo touch '/var/lib/clickhouse/flags/force_drop_table' && sudo chmod 666 '/var/lib/clickhouse/flags/force_drop_table'
clickhouse-client --query "DROP TABLE IF EXISTS ans;"
clickhouse-client --log_comment ${RUNNAME} --query "CREATE TABLE ans ENGINE = ${ENGINE} AS ${QUERY} SETTINGS max_insert_threads=${THREADS}, max_threads=${THREADS};"
local ret=$?;
if [[ $ret -eq 0 ]]; then return 0; elif [[ $ret -eq 210 ]]; then return 1; else echo "Unexpected return code from clickhouse-client: $ret" >&2 && return 1; fi;
clickhouse-client --query "SELECT * FROM ans LIMIT 3;"
sudo touch '/var/lib/clickhouse/flags/force_drop_table' && sudo chmod 666 '/var/lib/clickhouse/flags/force_drop_table'
clickhouse-client --query "DROP TABLE ans;"
}

ch_logrun() {
clickhouse-client --query "SYSTEM FLUSH LOGS;"
clickhouse-client --query "SELECT ${RUN} AS run, toUnixTimestamp(now()) AS timestamp, '${TASK}' AS task, '${SRC_DATANAME}' AS data_name, NULL AS in_rows, '${QUESTION}' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, '${FUNCTION}' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk FROM system.query_log WHERE type='QueryFinish' AND log_comment='${RUNNAME}' ORDER BY query_start_time DESC LIMIT 1 FORMAT CSVWithNames;" > clickhouse/log/${RUNNAME}.csv
local ret=$?;
if [[ $ret -eq 0 ]]; then return 0; elif [[ $ret -eq 210 ]]; then return 1; else echo "Unexpected return code from clickhouse-client: $ret" >&2 && return 1; fi;
}

ch_make_2_runs() {
RUN=1
RUNNAME="${TASK}_${SRC_DATANAME}_q${Q}_r${RUN}"
ch_query
ch_logrun

RUN=2
RUNNAME="${TASK}_${SRC_DATANAME}_q${Q}_r${RUN}"
ch_query
ch_logrun
}
Loading