Skip to content

Commit f657006

Browse files
committed
Improve add-db-to-osd.sh
Signed-off-by: Anthony D'Atri <[email protected]>
1 parent 3a83bfa commit f657006

File tree

1 file changed

+62
-45
lines changed

1 file changed

+62
-45
lines changed

add-db-to-osd.sh

Lines changed: 62 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,20 @@
11
#!/bin/bash
22
# Brett Kelly Oct 2021
3+
# Anthony D'Atri 2025-04-15
34
# 45Drives
4-
# Version 1.3 stable
5+
# Version 1.4 stable
56

67
usage() { # Help
78
cat << EOF
89
Usage:
910
[-b] Block DB size. Required. Allowed suffixes K,M,G,T
10-
[-d] Device to use as db. Required. Aliased Device name should be used /dev/X-Y
11-
[-f] Bypass osd per db warning
12-
[-o] OSDs to add db to. Required. Comma separated list of osd.id. <0,1,2,3>
11+
[-d] Device to use for DB+WAL. Required. Aliased Device name should be used /dev/X-Y
12+
[-f] Bypass OSD per DB warning
13+
[-o] OSDs to which to add DB+WAL. Required. Comma separated list of osd.id. <0,1,2,3>
14+
[-r] Number of OSDs to share a given WAL+DB offload device, default is 5, which is
15+
appropriate for SAS/SATA SSD offload devices. A value of 10 is usually reasonable
16+
for NVMe offload devices, but note that this number of OSDs will fail when the
17+
offload device fails.
1318
[-h] Displays this message
1419
EOF
1520
exit 0
@@ -36,25 +41,25 @@ add_lv_tags(){
3641
lvchange --addtag "ceph.type=db" $DB_LV_DEVICE
3742
}
3843

39-
check_dependancies(){
40-
for i in "${!SCRIPT_DEPENDANCIES[@]}"; do
41-
if ! command -v ${SCRIPT_DEPENDANCIES[i]} >/dev/null 2>&1;then
42-
echo "cli utility: ${SCRIPT_DEPENDANCIES[i]} is not installed"
43-
echo "jq, and bc are required"
44+
check_dependencies(){
45+
for i in "${!SCRIPT_DEPENDENCIES[@]}"; do
46+
if ! command -v ${SCRIPT_DEPENDENCIES[i]} >/dev/null 2>&1;then
47+
echo "The required utility: ${SCRIPT_DEPENDENCIES[i]} is not installed"
48+
echo "The jq and bc utilities are required"
4449
exit 1
4550
fi
4651
done
4752
}
4853

49-
# if encountering any error quit, so to not make a mess
54+
# Quit if we encounter any error, so to not make anything even worse
5055
set -e
5156

52-
SCRIPT_DEPENDANCIES=(bc jq)
57+
SCRIPT_DEPENDENCIES=(bc jq)
5358
FORCE="false"
5459
PHYSICAL_EXTENT_SIZE_BYTES=4194304
5560
OSD_PER_DB_LIMIT=5
5661

57-
while getopts 'b:fo:d:h' OPTION; do
62+
while getopts 'b:fo:d:h:r' OPTION; do
5863
case ${OPTION} in
5964
b)
6065
BLOCK_DB_SIZE=${OPTARG}
@@ -74,6 +79,11 @@ while getopts 'b:fo:d:h' OPTION; do
7479
OSD_LIST_=${OPTARG}
7580
IFS=',' read -r -a OSD_LIST <<< "$OSD_LIST_"
7681
;;
82+
r)
83+
OSD_PER_DB_LIMIT=${OPTARG}
84+
case $OSD_PER_DB_LIMIT in
85+
''|*[!0-9]*) echo "OSDs per DB device ratio must be an integer" ; exit 1 ;;
86+
esac
7787
h)
7888
usage
7989
;;
@@ -86,10 +96,8 @@ if [ -z $OSD_LIST ] || [ -z $DB_DEVICE ] || [ -z $BLOCK_DB_SIZE_BYTES ]; then
8696
exit 1
8797
fi
8898

89-
# If the db device given is a linux sd device then warn if you want to continue
90-
91-
# Check cli depandancies
92-
check_dependancies
99+
# Check CLI depandencies
100+
check_dependencies
93101

94102
BLOCK_DB_SIZE_EXTENTS=$(bc <<< "$BLOCK_DB_SIZE_BYTES/$PHYSICAL_EXTENT_SIZE_BYTES")
95103
OSD_COUNT="${#OSD_LIST[@]}"
@@ -101,44 +109,44 @@ DB_DEVICE_SIZE_BYTES=$(blockdev --getsize64 $DB_DEVICE)
101109
# check with wipefs that device has LVM data present
102110
DB_DEVICE_SIGNATURE=$(wipefs "$DB_DEVICE" --json | jq -r '.signatures | .[0].type // empty')
103111
# If this is empty the disk is assumed new.
104-
# If this is LVM2_member the disk is assumed to already have a db lv present it
112+
# If this is LVM2_member the disk is assumed to already have a DB LV present it
105113
# If anything else the disk is assumed to have something else on it and should be wiped. Quit with warning
106-
if [ -z "$LVM_JSON_DEVICE" ] || [ "$DB_DEVICE_SIGNATURE" == "LVM2_member" ];then
114+
if [ -z "$LVM_JSON_DEVICE" ] || [ "$DB_DEVICE_SIGNATURE" == "LVM2_member" ]; then
107115
:
108116
else
109-
echo "Disk is not empty nor a LVM device, wipe device first and run again"
117+
echo "Device is neither empty nor an LV device. Wipe the device and run again"
110118
exit 1
111119
fi
112120

113-
# Get PVS info for the specific disk we want
121+
# Get PV info for the specific disk we want
114122
LVM_JSON=$(pvs --units B --nosuffix -o name,vg_name,lv_name,lv_count,lvsize,vg_free --reportformat json )
115123
LVM_JSON_DEVICE=$(echo $LVM_JSON | jq --arg disk "$DB_DEVICE" '.[] |.[].pv | .[] | select(.pv_name==$disk)')
116124

117-
# Check we are using the correct device name
125+
# Ensure that we are using the correct device
118126
# if DB_DEVICE_SIGNATURE is LVM2_member and LVM_JSON_DEVICE is empty, then the wrong disk name was used (sd name instead of alias). Quit with warning
119127
if [ "$DB_DEVICE_SIGNATURE" == "LVM2_member" ] && [ -z "$LVM_JSON_DEVICE" ];then
120-
echo "WARNING: device selected ($DB_DEVICE) has a LVM signature, but could not get LVM info."
121-
echo "Wrong disk name was most likely provided, use the device alias name instead of the linux device name"
128+
echo "WARNING: device selected ($DB_DEVICE) has an LVM signature, but could not get LVM info."
129+
echo "Wrong device name was most likely provided, use the device alias name instead of the Linux device name"
122130
exit 1
123131
fi
124132

125-
# are we using an exitsing db device or a new device, if LVM_JSON_DEVICE is empty, and DB_DEVICE_SIGNATURE is empty we have a new disk
133+
# Are we using an existing DB device or a new device? if LVM_JSON_DEVICE is empty and DB_DEVICE_SIGNATURE is empty we have an empty device
126134
if [ -z "$LVM_JSON_DEVICE" ] && [ -z "$DB_DEVICE_SIGNATURE" ];then
127135
DB_VG_NAME="ceph-$(uuidgen)"
128136
else
129-
# if not how do we get db_VG ? inspect from device given
137+
# If not how do we get db_VG ? Derive from device given
130138
DB_VG_NAME="$(echo $LVM_JSON_DEVICE | jq -r '.vg_name' | awk 'NR==1')"
131-
# If there is no DB Volume group quit with warning. The disk has a LVM2_memebr signature but no volume group. Wipe disk and run again
139+
# If there is no DB Volume group quit with warning. The disk has a LVM2_memebr signature but no volume group. Wipe device and run again.
132140
if [ -z $DB_VG_NAME ];then
133-
echo "WARNING: Device selected ($DB_DEVICE) has a LVM2_member signature, but no volume group"
134-
echo "Wipe disk and run again"
141+
echo "WARNING: Device selected ($DB_DEVICE) has an LVM2_member signature, but no volume group"
142+
echo "Wipe the device and run again"
135143
exit 1
136144
fi
137-
# Count how many lv dbs are present, add that to input osds and compare to OSD_LIMIT
145+
# Count how many LV DBs are present, add that to input OSDs and compare to OSD_LIMIT
138146
EXISTING_DB_COUNT=$(echo $LVM_JSON_DEVICE | jq -r '.lv_count' | awk 'NR==1')
139-
echo "WARNING: device currently has $EXISTING_DB_COUNT db's present"
147+
echo "WARNING: device currently has $EXISTING_DB_COUNT dbs present"
140148
OSD_COUNT=$(bc <<< "${#OSD_LIST[@]}+$EXISTING_DB_COUNT")
141-
# set db total device size to the amount of free Bytes in the volume group
149+
# set DB total device size to the amount of free Bytes in the volume group
142150
DB_DEVICE_DISK_SIZE_BYTES=$(echo $LVM_JSON_DEVICE | jq -r '.vg_free' | awk 'NR==1')
143151
fi
144152

@@ -151,16 +159,17 @@ if [ "$FORCE" == "false" ] ; then
151159
fi
152160
fi
153161

154-
# Check if total size of db's to be created will fit on db device
162+
# Check if total size of DBs to be created will fit on DB device
155163
if [ "$TOTAL_DB_SIZE_BYTES" -gt "$DB_DEVICE_SIZE_BYTES" ] ; then
156164
echo "Warning: total size of db will not fit on device $DB_DEVICE"
157165
exit 1
158166
fi
159167

160-
# Check each osd to see if it present on host
161-
# Check each osd to see if it already has db device
162-
# Check current bluestore db size and compare to chosen db size
168+
# Check each OSD to see if it present on host
169+
# Check each OSD to see if it already has a DB device
170+
# Check current BlueStore DB size and compare to supplied DB size
163171
# Gather ceph-volume output before entering loop as it takes a while to run
172+
164173
CEPH_VOLUME_JSON=$(ceph-volume lvm list --format json)
165174
for i in "${!OSD_LIST[@]}"; do
166175
OSD_ID=${OSD_LIST[i]}
@@ -171,7 +180,7 @@ for i in "${!OSD_LIST[@]}"; do
171180
fi
172181
DB_CHECK=$(echo $OSD_JSON | jq 'select(.tags["ceph.db_device"])');
173182
if [ ! -z "$DB_CHECK" ]; then
174-
echo "Warning: osd.$OSD_ID already has a db device attached"
183+
echo "Warning: osd.$OSD_ID already has a DB device attached"
175184
exit 1
176185
fi
177186
CURRENT_BLOCK_DB_USED_BYTES=$(ceph daemon osd.$OSD_ID perf dump | jq '.bluefs | .db_used_bytes')
@@ -181,9 +190,10 @@ for i in "${!OSD_LIST[@]}"; do
181190
fi
182191
done
183192

184-
# Make sure ceph admin keyring is present hs correct permission
193+
# Make sure the admin keyring is present with correct permissions
185194
# Remove "set -e" so we can check ceph status error code
186-
# Then turn it back on after
195+
# Then turn it back on
196+
187197
set +e
188198
ceph status > /dev/null 2>&1 ; rc=$?
189199
set -e
@@ -192,8 +202,7 @@ if [[ "$rc" -ne 0 ]];then
192202
exit 1
193203
fi
194204

195-
# If we got this far then all checked are passed
196-
# Start migration process
205+
# If we got this far then all checks passed, so start the migration process
197206

198207
if [ -z "$LVM_JSON_DEVICE" ] && [ -z "$DB_DEVICE_SIGNATURE" ];then
199208
pvcreate $DB_DEVICE
@@ -214,33 +223,41 @@ for i in "${!OSD_LIST[@]}"; do
214223
chown -h ceph:ceph $DB_LV_DEVICE
215224
chown -R ceph:ceph $(realpath $DB_LV_DEVICE)
216225

217-
# Call ceph health check function dont continue unless cluster healthy
226+
# Don't continue unless the cluster is healthy
227+
218228
CEPH_STATUS=$(ceph health --format json | jq -r '.status')
219229
while [ "$CEPH_STATUS" != "HEALTH_OK" ]; do
220230
echo "Warning: Cluster is not in HEALTH_OK state"
221231
sleep 2
222232
CEPH_STATUS=$(ceph health --format json | jq -r '.status')
223233
done
224234

235+
OK_TO_STOP=$(ceph osd ok-to-stop $OSD_ID)
236+
if [ $OK_TOP_STOP -ne 0 ];
237+
echo "Error: stopping osd.$OSD_ID would result in data unavailability"
238+
exit 1
239+
fi
240+
225241
echo "Set noout"
226242
ceph osd set noout
227243
echo "Stop OSD.$OSD_ID"
228244
systemctl stop ceph-osd@$OSD_ID
245+
# Is this a relic of Filestore and thus superfluous?
229246
echo "Flush OSD Journal"
230247
ceph-osd -i $OSD_ID --flush-journal
231-
echo "Create new db"
248+
echo "Create new DB"
232249
CEPH_ARGS="--bluestore-block-db-size $BLOCK_DB_SIZE_BYTES" ceph-bluestore-tool bluefs-bdev-new-db --path /var/lib/ceph/osd/ceph-$OSD_ID/ --dev-target $DB_LV_DEVICE
233-
echo "Migrate old db to new db"
250+
echo "Migrate old DB to new DB"
234251
ceph-bluestore-tool bluefs-bdev-migrate --path /var/lib/ceph/osd/ceph-$OSD_ID/ --devs-source /var/lib/ceph/osd/ceph-$OSD_ID/block --dev-target /var/lib/ceph/osd/ceph-$OSD_ID/block.db
235-
echo "Update LV tags on block and db"
252+
echo "Update LV tags on block and DB devices"
236253
add_lv_tags
237254
echo "unmount OSD.$OSD_ID"
238255
umount /var/lib/ceph/osd/ceph-$OSD_ID/
239256
echo "Activate OSD.$OSD_ID"
240257
ceph-volume lvm activate $OSD_ID $OSD_FSID
241258
echo "Unset noout"
242259
ceph osd unset noout
243-
echo "Verify osd is back up before continuing"
260+
echo "Verify OSD is up before continuing"
244261
OSD_STATE=$(ceph osd tree --format json | jq --arg id "$OSD_ID" -r '.nodes[] | select(.id == ($id |tonumber)) | .status')
245262
echo "OSD_STATE: $OSD_STATE"
246263
while [ "$OSD_STATE" != "up" ]; do

0 commit comments

Comments
 (0)