11#! /bin/bash
22# Brett Kelly Oct 2021
3+ # Anthony D'Atri 2025-04-15
34# 45Drives
4- # Version 1.3 stable
5+ # Version 1.4 stable
56
67usage () { # Help
78cat << EOF
89 Usage:
910 [-b] Block DB size. Required. Allowed suffixes K,M,G,T
10- [-d] Device to use as db. Required. Aliased Device name should be used /dev/X-Y
11- [-f] Bypass osd per db warning
12- [-o] OSDs to add db to. Required. Comma separated list of osd.id. <0,1,2,3>
11+ [-d] Device to use for DB+WAL. Required. Aliased Device name should be used /dev/X-Y
12+ [-f] Bypass OSD per DB warning
13+ [-o] OSDs to which to add DB+WAL. Required. Comma separated list of osd.id. <0,1,2,3>
14+ [-r] Number of OSDs to share a given WAL+DB offload device, default is 5, which is
15+ appropriate for SAS/SATA SSD offload devices. A value of 10 is usually reasonable
16+ for NVMe offload devices, but note that this number of OSDs will fail when the
17+ offload device fails.
1318 [-h] Displays this message
1419EOF
1520 exit 0
@@ -36,25 +41,25 @@ add_lv_tags(){
3641 lvchange --addtag " ceph.type=db" $DB_LV_DEVICE
3742}
3843
39- check_dependancies (){
40- for i in " ${! SCRIPT_DEPENDANCIES [@]} " ; do
41- if ! command -v ${SCRIPT_DEPENDANCIES [i]} > /dev/null 2>&1 ; then
42- echo " cli utility: ${SCRIPT_DEPENDANCIES [i]} is not installed"
43- echo " jq, and bc are required"
44+ check_dependencies (){
45+ for i in " ${! SCRIPT_DEPENDENCIES [@]} " ; do
46+ if ! command -v ${SCRIPT_DEPENDENCIES [i]} > /dev/null 2>&1 ; then
47+ echo " The required utility: ${SCRIPT_DEPENDENCIES [i]} is not installed"
48+ echo " The jq and bc utilities are required"
4449 exit 1
4550 fi
4651 done
4752}
4853
49- # if encountering any error quit , so to not make a mess
54+ # Quit if we encounter any error, so to not make anything even worse
5055set -e
5156
52- SCRIPT_DEPENDANCIES =(bc jq)
57+ SCRIPT_DEPENDENCIES =(bc jq)
5358FORCE=" false"
5459PHYSICAL_EXTENT_SIZE_BYTES=4194304
5560OSD_PER_DB_LIMIT=5
5661
57- while getopts ' b:fo:d:h' OPTION; do
62+ while getopts ' b:fo:d:h:r ' OPTION; do
5863 case ${OPTION} in
5964 b)
6065 BLOCK_DB_SIZE=${OPTARG}
@@ -74,6 +79,11 @@ while getopts 'b:fo:d:h' OPTION; do
7479 OSD_LIST_=${OPTARG}
7580 IFS=' ,' read -r -a OSD_LIST <<< " $OSD_LIST_"
7681 ;;
82+ r)
83+ OSD_PER_DB_LIMIT=${OPTARG}
84+ case $OSD_PER_DB_LIMIT in
85+ ' ' |* [!0-9]* ) echo " OSDs per DB device ratio must be an integer" ; exit 1 ;;
86+ esac
7787 h)
7888 usage
7989 ;;
@@ -86,10 +96,8 @@ if [ -z $OSD_LIST ] || [ -z $DB_DEVICE ] || [ -z $BLOCK_DB_SIZE_BYTES ]; then
8696 exit 1
8797fi
8898
89- # If the db device given is a linux sd device then warn if you want to continue
90-
91- # Check cli depandancies
92- check_dependancies
99+ # Check CLI depandencies
100+ check_dependencies
93101
94102BLOCK_DB_SIZE_EXTENTS=$( bc <<< " $BLOCK_DB_SIZE_BYTES/$PHYSICAL_EXTENT_SIZE_BYTES" )
95103OSD_COUNT=" ${# OSD_LIST[@]} "
@@ -101,44 +109,44 @@ DB_DEVICE_SIZE_BYTES=$(blockdev --getsize64 $DB_DEVICE)
101109# check with wipefs that device has LVM data present
102110DB_DEVICE_SIGNATURE=$( wipefs " $DB_DEVICE " --json | jq -r ' .signatures | .[0].type // empty' )
103111# If this is empty the disk is assumed new.
104- # If this is LVM2_member the disk is assumed to already have a db lv present it
112+ # If this is LVM2_member the disk is assumed to already have a DB LV present it
105113# If anything else the disk is assumed to have something else on it and should be wiped. Quit with warning
106- if [ -z " $LVM_JSON_DEVICE " ] || [ " $DB_DEVICE_SIGNATURE " == " LVM2_member" ]; then
114+ if [ -z " $LVM_JSON_DEVICE " ] || [ " $DB_DEVICE_SIGNATURE " == " LVM2_member" ]; then
107115 :
108116else
109- echo " Disk is not empty nor a LVM device, wipe device first and run again"
117+ echo " Device is neither empty nor an LV device. Wipe the device and run again"
110118 exit 1
111119fi
112120
113- # Get PVS info for the specific disk we want
121+ # Get PV info for the specific disk we want
114122LVM_JSON=$( pvs --units B --nosuffix -o name,vg_name,lv_name,lv_count,lvsize,vg_free --reportformat json )
115123LVM_JSON_DEVICE=$( echo $LVM_JSON | jq --arg disk " $DB_DEVICE " ' .[] |.[].pv | .[] | select(.pv_name==$disk)' )
116124
117- # Check we are using the correct device name
125+ # Ensure that we are using the correct device
118126# if DB_DEVICE_SIGNATURE is LVM2_member and LVM_JSON_DEVICE is empty, then the wrong disk name was used (sd name instead of alias). Quit with warning
119127if [ " $DB_DEVICE_SIGNATURE " == " LVM2_member" ] && [ -z " $LVM_JSON_DEVICE " ]; then
120- echo " WARNING: device selected ($DB_DEVICE ) has a LVM signature, but could not get LVM info."
121- echo " Wrong disk name was most likely provided, use the device alias name instead of the linux device name"
128+ echo " WARNING: device selected ($DB_DEVICE ) has an LVM signature, but could not get LVM info."
129+ echo " Wrong device name was most likely provided, use the device alias name instead of the Linux device name"
122130 exit 1
123131fi
124132
125- # are we using an exitsing db device or a new device, if LVM_JSON_DEVICE is empty, and DB_DEVICE_SIGNATURE is empty we have a new disk
133+ # Are we using an existing DB device or a new device? if LVM_JSON_DEVICE is empty and DB_DEVICE_SIGNATURE is empty we have an empty device
126134if [ -z " $LVM_JSON_DEVICE " ] && [ -z " $DB_DEVICE_SIGNATURE " ]; then
127135 DB_VG_NAME=" ceph-$( uuidgen) "
128136else
129- # if not how do we get db_VG ? inspect from device given
137+ # If not how do we get db_VG ? Derive from device given
130138 DB_VG_NAME=" $( echo $LVM_JSON_DEVICE | jq -r ' .vg_name' | awk ' NR==1' ) "
131- # If there is no DB Volume group quit with warning. The disk has a LVM2_memebr signature but no volume group. Wipe disk and run again
139+ # If there is no DB Volume group quit with warning. The disk has a LVM2_memebr signature but no volume group. Wipe device and run again.
132140 if [ -z $DB_VG_NAME ]; then
133- echo " WARNING: Device selected ($DB_DEVICE ) has a LVM2_member signature, but no volume group"
134- echo " Wipe disk and run again"
141+ echo " WARNING: Device selected ($DB_DEVICE ) has an LVM2_member signature, but no volume group"
142+ echo " Wipe the device and run again"
135143 exit 1
136144 fi
137- # Count how many lv dbs are present, add that to input osds and compare to OSD_LIMIT
145+ # Count how many LV DBs are present, add that to input OSDs and compare to OSD_LIMIT
138146 EXISTING_DB_COUNT=$( echo $LVM_JSON_DEVICE | jq -r ' .lv_count' | awk ' NR==1' )
139- echo " WARNING: device currently has $EXISTING_DB_COUNT db's present"
147+ echo " WARNING: device currently has $EXISTING_DB_COUNT dbs present"
140148 OSD_COUNT=$( bc <<< " ${#OSD_LIST[@]}+$EXISTING_DB_COUNT" )
141- # set db total device size to the amount of free Bytes in the volume group
149+ # set DB total device size to the amount of free Bytes in the volume group
142150 DB_DEVICE_DISK_SIZE_BYTES=$( echo $LVM_JSON_DEVICE | jq -r ' .vg_free' | awk ' NR==1' )
143151fi
144152
@@ -151,16 +159,17 @@ if [ "$FORCE" == "false" ] ; then
151159 fi
152160fi
153161
154- # Check if total size of db's to be created will fit on db device
162+ # Check if total size of DBs to be created will fit on DB device
155163if [ " $TOTAL_DB_SIZE_BYTES " -gt " $DB_DEVICE_SIZE_BYTES " ] ; then
156164 echo " Warning: total size of db will not fit on device $DB_DEVICE "
157165 exit 1
158166fi
159167
160- # Check each osd to see if it present on host
161- # Check each osd to see if it already has db device
162- # Check current bluestore db size and compare to chosen db size
168+ # Check each OSD to see if it present on host
169+ # Check each OSD to see if it already has a DB device
170+ # Check current BlueStore DB size and compare to supplied DB size
163171# Gather ceph-volume output before entering loop as it takes a while to run
172+
164173CEPH_VOLUME_JSON=$( ceph-volume lvm list --format json)
165174for i in " ${! OSD_LIST[@]} " ; do
166175 OSD_ID=${OSD_LIST[i]}
@@ -171,7 +180,7 @@ for i in "${!OSD_LIST[@]}"; do
171180 fi
172181 DB_CHECK=$( echo $OSD_JSON | jq ' select(.tags["ceph.db_device"])' ) ;
173182 if [ ! -z " $DB_CHECK " ]; then
174- echo " Warning: osd.$OSD_ID already has a db device attached"
183+ echo " Warning: osd.$OSD_ID already has a DB device attached"
175184 exit 1
176185 fi
177186 CURRENT_BLOCK_DB_USED_BYTES=$( ceph daemon osd.$OSD_ID perf dump | jq ' .bluefs | .db_used_bytes' )
@@ -181,9 +190,10 @@ for i in "${!OSD_LIST[@]}"; do
181190 fi
182191done
183192
184- # Make sure ceph admin keyring is present hs correct permission
193+ # Make sure the admin keyring is present with correct permissions
185194# Remove "set -e" so we can check ceph status error code
186- # Then turn it back on after
195+ # Then turn it back on
196+
187197set +e
188198ceph status > /dev/null 2>&1 ; rc=$?
189199set -e
@@ -192,8 +202,7 @@ if [[ "$rc" -ne 0 ]];then
192202 exit 1
193203fi
194204
195- # If we got this far then all checked are passed
196- # Start migration process
205+ # If we got this far then all checks passed, so start the migration process
197206
198207if [ -z " $LVM_JSON_DEVICE " ] && [ -z " $DB_DEVICE_SIGNATURE " ]; then
199208 pvcreate $DB_DEVICE
@@ -214,33 +223,41 @@ for i in "${!OSD_LIST[@]}"; do
214223 chown -h ceph:ceph $DB_LV_DEVICE
215224 chown -R ceph:ceph $( realpath $DB_LV_DEVICE )
216225
217- # Call ceph health check function dont continue unless cluster healthy
226+ # Don't continue unless the cluster is healthy
227+
218228 CEPH_STATUS=$( ceph health --format json | jq -r ' .status' )
219229 while [ " $CEPH_STATUS " != " HEALTH_OK" ]; do
220230 echo " Warning: Cluster is not in HEALTH_OK state"
221231 sleep 2
222232 CEPH_STATUS=$( ceph health --format json | jq -r ' .status' )
223233 done
224234
235+ OK_TO_STOP=$( ceph osd ok-to-stop $OSD_ID )
236+ if [ $OK_TOP_STOP -ne 0 ];
237+ echo " Error: stopping osd.$OSD_ID would result in data unavailability"
238+ exit 1
239+ fi
240+
225241 echo " Set noout"
226242 ceph osd set noout
227243 echo " Stop OSD.$OSD_ID "
228244 systemctl stop ceph-osd@$OSD_ID
245+ # Is this a relic of Filestore and thus superfluous?
229246 echo " Flush OSD Journal"
230247 ceph-osd -i $OSD_ID --flush-journal
231- echo " Create new db "
248+ echo " Create new DB "
232249 CEPH_ARGS=" --bluestore-block-db-size $BLOCK_DB_SIZE_BYTES " ceph-bluestore-tool bluefs-bdev-new-db --path /var/lib/ceph/osd/ceph-$OSD_ID / --dev-target $DB_LV_DEVICE
233- echo " Migrate old db to new db "
250+ echo " Migrate old DB to new DB "
234251 ceph-bluestore-tool bluefs-bdev-migrate --path /var/lib/ceph/osd/ceph-$OSD_ID / --devs-source /var/lib/ceph/osd/ceph-$OSD_ID /block --dev-target /var/lib/ceph/osd/ceph-$OSD_ID /block.db
235- echo " Update LV tags on block and db "
252+ echo " Update LV tags on block and DB devices "
236253 add_lv_tags
237254 echo " unmount OSD.$OSD_ID "
238255 umount /var/lib/ceph/osd/ceph-$OSD_ID /
239256 echo " Activate OSD.$OSD_ID "
240257 ceph-volume lvm activate $OSD_ID $OSD_FSID
241258 echo " Unset noout"
242259 ceph osd unset noout
243- echo " Verify osd is back up before continuing"
260+ echo " Verify OSD is up before continuing"
244261 OSD_STATE=$( ceph osd tree --format json | jq --arg id " $OSD_ID " -r ' .nodes[] | select(.id == ($id |tonumber)) | .status' )
245262 echo " OSD_STATE: $OSD_STATE "
246263 while [ " $OSD_STATE " != " up" ]; do
0 commit comments