11#! /bin/bash
22# Brett Kelly Oct 2021
3+ # Anthony D'Atri 2025-04-15
34# 45Drives
4- # Version 1.3 stable
5+ # Version 1.4 stable
56
67usage () { # Help
78cat << EOF
89 Usage:
910 [-b] Block DB size. Required. Allowed suffixes K,M,G,T
10- [-d] Device to use as db. Required. Aliased Device name should be used /dev/X-Y
11- [-f] Bypass osd per db warning
12- [-o] OSDs to add db to. Required. Comma separated list of osd.id. <0,1,2,3>
11+ [-d] Device to use for DB+WAL. Required. Aliased Device name should be used /dev/X-Y
12+ [-f] Bypass OSD per DB warning
13+ [-o] OSDs to which to add DB+WAL. Required. Comma separated list of osd.id. <0,1,2,3>
14+ [-r] Number of OSDs to share a given WAL+DB offload device, default is 5, which is
15+ appropriate for SAS/SATA SSD offload devices. A value of 10 is usually reasonable
16+ for NVMe offload devices, but note that this number of OSDs will fail when the
17+ offload device fails.
1318 [-h] Displays this message
1419EOF
1520 exit 0
@@ -36,25 +41,25 @@ add_lv_tags(){
3641 lvchange --addtag " ceph.type=db" $DB_LV_DEVICE
3742}
3843
39- check_dependancies (){
40- for i in " ${! SCRIPT_DEPENDANCIES [@]} " ; do
41- if ! command -v ${SCRIPT_DEPENDANCIES [i]} > /dev/null 2>&1 ; then
42- echo " cli utility: ${SCRIPT_DEPENDANCIES [i]} is not installed"
43- echo " jq, and bc are required"
44+ check_dependencies (){
45+ for i in " ${! SCRIPT_DEPENDENCIES [@]} " ; do
46+ if ! command -v ${SCRIPT_DEPENDENCIES [i]} > /dev/null 2>&1 ; then
47+ echo " The required utility: ${SCRIPT_DEPENDENCIES [i]} is not installed"
48+ echo " The jq and bc utilities are required"
4449 exit 1
4550 fi
4651 done
4752}
4853
49- # if encountering any error quit , so to not make a mess
54+ # Quit if we encounter any error, so to not make anything even worse
5055set -e
5156
52- SCRIPT_DEPENDANCIES =(bc jq)
57+ SCRIPT_DEPENDENCIES =(bc jq)
5358FORCE=" false"
5459PHYSICAL_EXTENT_SIZE_BYTES=4194304
5560OSD_PER_DB_LIMIT=5
5661
57- while getopts ' b:fo:d:h ' OPTION; do
62+ while getopts ' b:fo:d:hr: ' OPTION; do
5863 case ${OPTION} in
5964 b)
6065 BLOCK_DB_SIZE=${OPTARG}
@@ -74,6 +79,12 @@ while getopts 'b:fo:d:h' OPTION; do
7479 OSD_LIST_=${OPTARG}
7580 IFS=' ,' read -r -a OSD_LIST <<< " $OSD_LIST_"
7681 ;;
82+ r)
83+ OSD_PER_DB_LIMIT=${OPTARG}
84+ case $OSD_PER_DB_LIMIT in
85+ ' ' |* [!0-9]* ) echo " OSDs per DB device ratio must be an integer" ; exit 1 ;;
86+ esac
87+ ;;
7788 h)
7889 usage
7990 ;;
@@ -86,10 +97,8 @@ if [ -z $OSD_LIST ] || [ -z $DB_DEVICE ] || [ -z $BLOCK_DB_SIZE_BYTES ]; then
8697 exit 1
8798fi
8899
89- # If the db device given is a linux sd device then warn if you want to continue
90-
91- # Check cli depandancies
92- check_dependancies
100+ # Check CLI depandencies
101+ check_dependencies
93102
94103BLOCK_DB_SIZE_EXTENTS=$( bc <<< " $BLOCK_DB_SIZE_BYTES/$PHYSICAL_EXTENT_SIZE_BYTES" )
95104OSD_COUNT=" ${# OSD_LIST[@]} "
@@ -101,44 +110,44 @@ DB_DEVICE_SIZE_BYTES=$(blockdev --getsize64 $DB_DEVICE)
101110# check with wipefs that device has LVM data present
102111DB_DEVICE_SIGNATURE=$( wipefs " $DB_DEVICE " --json | jq -r ' .signatures | .[0].type // empty' )
103112# If this is empty the disk is assumed new.
104- # If this is LVM2_member the disk is assumed to already have a db lv present it
113+ # If this is LVM2_member the disk is assumed to already have a DB LV present it
105114# If anything else the disk is assumed to have something else on it and should be wiped. Quit with warning
106- if [ -z " $LVM_JSON_DEVICE " ] || [ " $DB_DEVICE_SIGNATURE " == " LVM2_member" ]; then
115+ if [ -z " $LVM_JSON_DEVICE " ] || [ " $DB_DEVICE_SIGNATURE " == " LVM2_member" ]; then
107116 :
108117else
109- echo " Disk is not empty nor a LVM device, wipe device first and run again"
118+ echo " Device is neither empty nor an LV device. Wipe the device and run again"
110119 exit 1
111120fi
112121
113- # Get PVS info for the specific disk we want
122+ # Get PV info for the specific disk we want
114123LVM_JSON=$( pvs --units B --nosuffix -o name,vg_name,lv_name,lv_count,lvsize,vg_free --reportformat json )
115124LVM_JSON_DEVICE=$( echo $LVM_JSON | jq --arg disk " $DB_DEVICE " ' .[] |.[].pv | .[] | select(.pv_name==$disk)' )
116125
117- # Check we are using the correct device name
126+ # Ensure that we are using the correct device
118127# if DB_DEVICE_SIGNATURE is LVM2_member and LVM_JSON_DEVICE is empty, then the wrong disk name was used (sd name instead of alias). Quit with warning
119128if [ " $DB_DEVICE_SIGNATURE " == " LVM2_member" ] && [ -z " $LVM_JSON_DEVICE " ]; then
120- echo " WARNING: device selected ($DB_DEVICE ) has a LVM signature, but could not get LVM info."
121- echo " Wrong disk name was most likely provided, use the device alias name instead of the linux device name"
129+ echo " WARNING: device selected ($DB_DEVICE ) has an LVM signature, but could not get LVM info."
130+ echo " Wrong device name was most likely provided, use the device alias name instead of the Linux device name"
122131 exit 1
123132fi
124133
125- # are we using an exitsing db device or a new device, if LVM_JSON_DEVICE is empty, and DB_DEVICE_SIGNATURE is empty we have a new disk
134+ # Are we using an existing DB device or a new device? if LVM_JSON_DEVICE is empty and DB_DEVICE_SIGNATURE is empty we have an empty device
126135if [ -z " $LVM_JSON_DEVICE " ] && [ -z " $DB_DEVICE_SIGNATURE " ]; then
127136 DB_VG_NAME=" ceph-$( uuidgen) "
128137else
129- # if not how do we get db_VG ? inspect from device given
138+ # If not how do we get db_VG ? Derive from device given
130139 DB_VG_NAME=" $( echo $LVM_JSON_DEVICE | jq -r ' .vg_name' | awk ' NR==1' ) "
131- # If there is no DB Volume group quit with warning. The disk has a LVM2_memebr signature but no volume group. Wipe disk and run again
140+ # If there is no DB Volume group quit with warning. The disk has a LVM2_memebr signature but no volume group. Wipe device and run again.
132141 if [ -z $DB_VG_NAME ]; then
133- echo " WARNING: Device selected ($DB_DEVICE ) has a LVM2_member signature, but no volume group"
134- echo " Wipe disk and run again"
142+ echo " WARNING: Device selected ($DB_DEVICE ) has an LVM2_member signature, but no volume group"
143+ echo " Wipe the device and run again"
135144 exit 1
136145 fi
137- # Count how many lv dbs are present, add that to input osds and compare to OSD_LIMIT
146+ # Count how many LV DBs are present, add that to input OSDs and compare to OSD_LIMIT
138147 EXISTING_DB_COUNT=$( echo $LVM_JSON_DEVICE | jq -r ' .lv_count' | awk ' NR==1' )
139- echo " WARNING: device currently has $EXISTING_DB_COUNT db's present"
148+ echo " WARNING: device currently has $EXISTING_DB_COUNT dbs present"
140149 OSD_COUNT=$( bc <<< " ${#OSD_LIST[@]}+$EXISTING_DB_COUNT" )
141- # set db total device size to the amount of free Bytes in the volume group
150+ # set DB total device size to the amount of free Bytes in the volume group
142151 DB_DEVICE_DISK_SIZE_BYTES=$( echo $LVM_JSON_DEVICE | jq -r ' .vg_free' | awk ' NR==1' )
143152fi
144153
@@ -151,16 +160,17 @@ if [ "$FORCE" == "false" ] ; then
151160 fi
152161fi
153162
154- # Check if total size of db's to be created will fit on db device
163+ # Check if total size of DBs to be created will fit on DB device
155164if [ " $TOTAL_DB_SIZE_BYTES " -gt " $DB_DEVICE_SIZE_BYTES " ] ; then
156165 echo " Warning: total size of db will not fit on device $DB_DEVICE "
157166 exit 1
158167fi
159168
160- # Check each osd to see if it present on host
161- # Check each osd to see if it already has db device
162- # Check current bluestore db size and compare to chosen db size
169+ # Check each OSD to see if it present on host
170+ # Check each OSD to see if it already has a DB device
171+ # Check current BlueStore DB size and compare to supplied DB size
163172# Gather ceph-volume output before entering loop as it takes a while to run
173+
164174CEPH_VOLUME_JSON=$( ceph-volume lvm list --format json)
165175for i in " ${! OSD_LIST[@]} " ; do
166176 OSD_ID=${OSD_LIST[i]}
@@ -171,7 +181,7 @@ for i in "${!OSD_LIST[@]}"; do
171181 fi
172182 DB_CHECK=$( echo $OSD_JSON | jq ' select(.tags["ceph.db_device"])' ) ;
173183 if [ ! -z " $DB_CHECK " ]; then
174- echo " Warning: osd.$OSD_ID already has a db device attached"
184+ echo " Warning: osd.$OSD_ID already has a DB device attached"
175185 exit 1
176186 fi
177187 CURRENT_BLOCK_DB_USED_BYTES=$( ceph daemon osd.$OSD_ID perf dump | jq ' .bluefs | .db_used_bytes' )
@@ -181,9 +191,10 @@ for i in "${!OSD_LIST[@]}"; do
181191 fi
182192done
183193
184- # Make sure ceph admin keyring is present hs correct permission
194+ # Make sure the admin keyring is present with correct permissions
185195# Remove "set -e" so we can check ceph status error code
186- # Then turn it back on after
196+ # Then turn it back on
197+
187198set +e
188199ceph status > /dev/null 2>&1 ; rc=$?
189200set -e
@@ -192,8 +203,7 @@ if [[ "$rc" -ne 0 ]];then
192203 exit 1
193204fi
194205
195- # If we got this far then all checked are passed
196- # Start migration process
206+ # If we got this far then all checks passed, so start the migration process
197207
198208if [ -z " $LVM_JSON_DEVICE " ] && [ -z " $DB_DEVICE_SIGNATURE " ]; then
199209 pvcreate $DB_DEVICE
@@ -214,33 +224,38 @@ for i in "${!OSD_LIST[@]}"; do
214224 chown -h ceph:ceph $DB_LV_DEVICE
215225 chown -R ceph:ceph $( realpath $DB_LV_DEVICE )
216226
217- # Call ceph health check function dont continue unless cluster healthy
227+ # Don't continue unless the cluster is healthy
228+
218229 CEPH_STATUS=$( ceph health --format json | jq -r ' .status' )
219230 while [ " $CEPH_STATUS " != " HEALTH_OK" ]; do
220231 echo " Warning: Cluster is not in HEALTH_OK state"
221232 sleep 2
222233 CEPH_STATUS=$( ceph health --format json | jq -r ' .status' )
223234 done
224235
236+ OK_TO_STOP=$( ceph osd ok-to-stop $OSD_ID )
237+ if [ $OK_TOP_STOP -ne 0 ];
238+ echo " Error: stopping osd.$OSD_ID would result in data unavailability"
239+ exit 1
240+ fi
241+
225242 echo " Set noout"
226243 ceph osd set noout
227244 echo " Stop OSD.$OSD_ID "
228245 systemctl stop ceph-osd@$OSD_ID
229- echo " Flush OSD Journal"
230- ceph-osd -i $OSD_ID --flush-journal
231- echo " Create new db"
246+ echo " Create new DB"
232247 CEPH_ARGS=" --bluestore-block-db-size $BLOCK_DB_SIZE_BYTES " ceph-bluestore-tool bluefs-bdev-new-db --path /var/lib/ceph/osd/ceph-$OSD_ID / --dev-target $DB_LV_DEVICE
233- echo " Migrate old db to new db "
248+ echo " Migrate old DB to new DB "
234249 ceph-bluestore-tool bluefs-bdev-migrate --path /var/lib/ceph/osd/ceph-$OSD_ID / --devs-source /var/lib/ceph/osd/ceph-$OSD_ID /block --dev-target /var/lib/ceph/osd/ceph-$OSD_ID /block.db
235- echo " Update LV tags on block and db "
250+ echo " Update LV tags on block and DB devices "
236251 add_lv_tags
237252 echo " unmount OSD.$OSD_ID "
238253 umount /var/lib/ceph/osd/ceph-$OSD_ID /
239254 echo " Activate OSD.$OSD_ID "
240255 ceph-volume lvm activate $OSD_ID $OSD_FSID
241256 echo " Unset noout"
242257 ceph osd unset noout
243- echo " Verify osd is back up before continuing"
258+ echo " Verify OSD is up before continuing"
244259 OSD_STATE=$( ceph osd tree --format json | jq --arg id " $OSD_ID " -r ' .nodes[] | select(.id == ($id |tonumber)) | .status' )
245260 echo " OSD_STATE: $OSD_STATE "
246261 while [ " $OSD_STATE " != " up" ]; do
0 commit comments