Skip to content

Commit 5882b72

Browse files
committed
[Bench](tools) Add coffee-bench tools for Doris
1 parent 34bab82 commit 5882b72

File tree

23 files changed

+1162
-0
lines changed

23 files changed

+1162
-0
lines changed

tools/coffeebench-tools/README.md

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
<!--
2+
Licensed to the Apache Software Foundation (ASF) under one
3+
or more contributor license agreements. See the NOTICE file
4+
distributed with this work for additional information
5+
regarding copyright ownership. The ASF licenses this file
6+
to you under the Apache License, Version 2.0 (the
7+
"License"); you may not use this file except in compliance
8+
with the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing,
13+
software distributed under the License is distributed on an
14+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
KIND, either express or implied. See the License for the
16+
specific language governing permissions and limitations
17+
under the License.
18+
-->
19+
20+
## Usage
21+
22+
These scripts are used to make coffee-bench test.
23+
follow the steps below:
24+
25+
### 1. create tables. modify `conf/doris-cluster.conf` to specify doris info, then run script below.
26+
27+
./bin/create-tables.sh
28+
29+
### 2. load coffee-bench data. use -s for scale. "500m/1b/5b"
30+
31+
./bin/load-data.sh -s 500m
32+
33+
**Note: The data loading process uses S3 data located in the US East (us-east-1) region.**
34+
If you cannot access this region due to network restrictions or other reasons, you can:
35+
1. Use AWS CLI to copy the data to an S3 bucket in your preferred region
36+
2. Download the data locally and use alternative methods to import it into Doris
37+
38+
### 3. run queries.
39+
40+
./bin/run-queries.sh
41+
42+
NOTICE: At present, we rewrite some queries in coffee bench to adapt to Doris' execution framework, but it does not affect the correctness of the results.
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
#!/usr/bin/env bash
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
19+
##############################################################
20+
# This script is used to create coffee-bench tables
21+
##############################################################
22+
23+
set -eo pipefail
24+
25+
ROOT=$(dirname "$0")
26+
ROOT=$(
27+
cd "${ROOT}"
28+
pwd
29+
)
30+
31+
CURDIR=${ROOT}
32+
33+
usage() {
34+
echo "
35+
This script is used to create coffee-bench tables,
36+
will use mysql client to connect Doris server which is specified in doris-cluster.conf file.
37+
Usage: $0
38+
"
39+
exit 1
40+
}
41+
42+
OPTS=$(getopt \
43+
-n "$0" \
44+
-o '' \
45+
-o 'hs:' \
46+
-- "$@")
47+
48+
eval set -- "${OPTS}"
49+
HELP=0
50+
51+
if [[ $# == 0 ]]; then
52+
usage
53+
fi
54+
55+
while true; do
56+
case "$1" in
57+
-h)
58+
HELP=1
59+
shift
60+
;;
61+
--)
62+
shift
63+
break
64+
;;
65+
*)
66+
echo "Internal error"
67+
exit 1
68+
;;
69+
esac
70+
done
71+
72+
if [[ "${HELP}" -eq 1 ]]; then
73+
usage
74+
fi
75+
76+
check_prerequest() {
77+
local CMD=$1
78+
local NAME=$2
79+
if ! ${CMD}; then
80+
echo "${NAME} is missing. This script depends on mysql to create tables in Doris."
81+
exit 1
82+
fi
83+
}
84+
85+
check_prerequest "mysql --version" "mysql"
86+
87+
source "${CURDIR}/../conf/doris-cluster.conf"
88+
export MYSQL_PWD=${PASSWORD}
89+
90+
echo "FE_HOST: ${FE_HOST}"
91+
echo "FE_QUERY_PORT: ${FE_QUERY_PORT}"
92+
echo "USER: ${USER}"
93+
echo "DB: ${DB}"
94+
95+
mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -e "CREATE DATABASE IF NOT EXISTS ${DB}"
96+
97+
echo "Run SQLs from ${CURDIR}/../ddl/create-tables.sql"
98+
mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" <"${CURDIR}"/../ddl/create-tables.sql
99+
100+
echo "coffee benchmark tables has been created"
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
#!/usr/bin/env bash
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
19+
##############################################################
20+
# This script is used to generate TPC-H data set
21+
##############################################################
22+
23+
set -eo pipefail
24+
25+
ROOT=$(dirname "$0")
26+
ROOT=$(
27+
cd "${ROOT}"
28+
pwd
29+
)
30+
31+
CURDIR="${ROOT}"
32+
33+
usage() {
34+
echo "
35+
Usage: $0 <options>
36+
Optional options:
37+
-s scale factor, default is 500m
38+
-c parallelism to generate data of (lineitem, orders, partsupp) table, default is 10
39+
40+
Eg.
41+
$0 generate data using default value.
42+
$0 -s 1b generate data with scale 1b.
43+
"
44+
exit 1
45+
}
46+
47+
OPTS=$(getopt \
48+
-n "$0" \
49+
-o '' \
50+
-o 'hs:c:' \
51+
-- "$@")
52+
53+
eval set -- "${OPTS}"
54+
55+
SCALE_FACTOR="500m"
56+
HELP=0
57+
58+
if [[ $# == 0 ]]; then
59+
usage
60+
fi
61+
62+
while true; do
63+
case "$1" in
64+
-h)
65+
HELP=1
66+
shift
67+
;;
68+
-s)
69+
SCALE_FACTOR=$2
70+
shift 2
71+
;;
72+
--)
73+
shift
74+
break
75+
;;
76+
*)
77+
echo "Internal error"
78+
exit 1
79+
;;
80+
esac
81+
done
82+
83+
if [[ "${HELP}" -eq 1 ]]; then
84+
usage
85+
fi
86+
87+
echo "Scale Factor: ${SCALE_FACTOR}"
88+
89+
source "${CURDIR}/../conf/doris-cluster.conf"
90+
export MYSQL_PWD=${PASSWORD:-}
91+
92+
echo "FE_HOST: ${FE_HOST:='127.0.0.1'}"
93+
echo "FE_QUERY_PORT: ${FE_QUERY_PORT:='9030'}"
94+
echo "USER: ${USER:='root'}"
95+
echo "DB: ${DB:='tpch'}"
96+
97+
run_sql() {
98+
mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" -e "$*"
99+
}
100+
101+
# Load dim_products data from S3
102+
run_sql "INSERT INTO dim_products
103+
SELECT
104+
COALESCE(record_id, '') AS record_id,
105+
COALESCE(product_id, '') AS product_id,
106+
COALESCE(name, '') AS name,
107+
COALESCE(category, '') AS category,
108+
COALESCE(subcategory, '') AS subcategory,
109+
COALESCE(standard_cost, 0.0) AS standard_cost,
110+
COALESCE(standard_price, 0.0) AS standard_price,
111+
COALESCE(from_date, '1970-01-01') AS from_date,
112+
COALESCE(to_date, '9999-12-31') AS to_date
113+
FROM s3(
114+
'uri' = 's3://happen-test/dim_products/*',
115+
's3.endpoint' = 's3.us-east-1.amazonaws.com',
116+
's3.access_key' = '${S3_ACCESS_KEY}',
117+
's3.secret_key' = '${S3_SECRET_KEY}',
118+
's3.region' = 'us-east-1',
119+
'format' = 'parquet'
120+
);"
121+
122+
# Load dim_locations data from S3
123+
run_sql "INSERT INTO dim_locations
124+
SELECT
125+
COALESCE(record_id, '') AS record_id,
126+
COALESCE(location_id, '') AS location_id,
127+
COALESCE(city, '') AS city,
128+
COALESCE(state, '') AS state,
129+
COALESCE(country, '') AS country,
130+
COALESCE(region, '') AS region
131+
FROM s3(
132+
'uri' = 's3://happen-test/dim_locations/*',
133+
's3.endpoint' = 's3.us-east-1.amazonaws.com',
134+
's3.access_key' = '${S3_ACCESS_KEY}',
135+
's3.secret_key' = '${S3_SECRET_KEY}',
136+
's3.region' = 'us-east-1',
137+
'format' = 'parquet'
138+
);"
139+
140+
# load fact_sale
141+
run_sql "INSERT INTO fact_sales
142+
SELECT
143+
COALESCE(order_id, '') AS order_id,
144+
COALESCE(order_line_id, '') AS order_line_id,
145+
COALESCE(order_date, '1970-01-01') AS order_date,
146+
COALESCE(time_of_day, '') AS time_of_day,
147+
COALESCE(season, '') AS season,
148+
COALESCE(month, 0) AS month,
149+
COALESCE(location_id, '') AS location_id,
150+
COALESCE(region, '') AS region,
151+
COALESCE(product_name, '') AS product_name,
152+
COALESCE(quantity, 0) AS quantity,
153+
COALESCE(sales_amount, 0.0) AS sales_amount,
154+
COALESCE(discount_percentage, 0) AS discount_percentage,
155+
COALESCE(product_id, '') AS product_id
156+
FROM s3(
157+
'uri' = 's3://happen-test/fact_sales_${SCALE_FACTOR}/*',
158+
's3.endpoint' = 's3.us-east-1.amazonaws.com',
159+
's3.access_key' = '${S3_ACCESS_KEY}',
160+
's3.secret_key' = '${S3_SECRET_KEY}',
161+
's3.region' = 'us-east-1',
162+
'format' = 'parquet'
163+
);"
164+
165+
echo "load finish"

0 commit comments

Comments
 (0)