diff --git a/misc/mdcheck b/misc/mdcheck index 398a1ea6..fcd38e2a 100644 --- a/misc/mdcheck +++ b/misc/mdcheck @@ -21,42 +21,96 @@ # # It supports a 'time budget' such that any incomplete 'check' # will be checkpointed when that time has expired. -# A subsequent invocation can allow the 'check' to continue. +# A subsequent invocation will allow the 'check' to continue. +# +# Arrays are identified by UUID and the 'sync_completed' value is stored +# in /var/lib/mdcheck/MD_UUID_$UUID. When the script has finished checking +# an array, it creates a file /var/lib/mdcheck/Checked_$UUID. +# +# Modes are: +# --continue Don't start new checks, only continue previously started ones +# for which MD_UUID_$UUID already exists +# --maybe-start Like --continue, but also start new checks for arrays for which +# Checked_$UUID does not exist. +# --restart: Enable restarting checks that were previously finished +# (remove all Checked_* files) and exit immediately. +# This mode doesn't start any array checks. +# --force-start: A new check from 0 will be started on all arrays, even those +# that hadn't finished before. +# +# With none of these options given, the mode defaults to --force-start +# for backward compatibility reasons. # # Options are: -# --continue Don't start new checks, only continue old ones. # --duration This is passed to "date --date=$duration" to find out # when to finish -# -# To support '--continue', arrays are identified by UUID and the 'sync_completed' -# value is stored in /var/lib/mdcheck/$UUID - -# convert a /dev/md name into /sys/.../md equivalent -sysname() { - set `ls -lLd $1` - maj=${5%,} - min=$6 - readlink -f /sys/dev/block/$maj:$min + + +# If the script is run from systemd, simply write to the journal on stderr. +# Otherwise, use logger. +log() { + if [[ "$INVOCATION_ID" ]]; then + echo "$@" >&2 + else + logger -p daemon.info "mdcheck: $*" + fi +} + +# get device name from sysfs +devname() { + local dev + [[ -f "$1/uevent" ]] && \ + dev=$(. "$1/uevent" && echo -n "$DEVNAME") + [[ "$dev" && -b "/dev/$dev" ]] || return 1 + echo -n "/dev/$dev" +} + +usage() { + echo >&2 'Usage: mdcheck [mode] [options] + +Mode: + --help print this help + --continue only continue previously started checks + --maybe-start continue, and start check on arrays that have not been checked + --restart re-enable checking previously finished arrays + --force-start start checking all arrays from zero (default) +Options: + --duration time-offset (must be understood by "date --date")' } -args=$(getopt -o hcd: -l help,continue,duration: -n mdcheck -- "$@") +set_mode() { + [[ "$MODE" ]] && { + echo >&2 'ERROR: only one of --continue, --maybe-start, --restart, or --force-start may be set +' + usage + exit 1 + } + MODE=$1 +} + +args=$(getopt -o hcmrfd: -l help,continue,maybe-start,restart,force-start,duration: -n mdcheck -- "$@") rv=$? -if [ $rv -ne 0 ]; then exit $rv; fi +if [ $rv -ne 0 ]; then + usage + exit $rv +fi eval set -- $args -cont= +MODE= endtime= while [ " $1" != " --" ] do case $1 in --help ) - echo >&2 'Usage: mdcheck [--continue] [--duration time-offset]' - echo >&2 ' time-offset must be understood by "date --date"' + usage exit 0 ;; - --continue ) cont=yes ;; - --duration ) shift; dur=$1 + --continue|--maybe-start|--restart|--force-start) + set_mode "$1" + ;; + --duration ) + shift; dur=$1 endtime=$(date --date "$dur" "+%s") ;; esac @@ -64,6 +118,31 @@ do done shift +[[ $# -eq 0 ]] || { + usage + exit 1 +} + +[[ "$MODE" ]] || { + echo >&2 'mdcheck: WARNING: no mode specified, defaulting to --force-start' + echo >&2 '=== Type ctrl-c within 5 seconds if this is not intended' + sleep 5 + set_mode --force-start + echo >&2 'mdcheck is running in --force-start mode.' +} + +case $MODE in + --restart) + log 'Re-enabling array checks for previously finished arrays' + rm -f /var/lib/mdcheck/Checked_* + exit 0 + ;; + --force-start) + log 'Forcing array check from 0 for all arrays' + rm -f /var/lib/mdcheck/Checked_* /var/lib/mdcheck/MD_UUID_* + ;; +esac + # We need a temp file occasionally... tmp=/var/lib/mdcheck/.md-check-$$ cnt=0 @@ -87,7 +166,7 @@ cleanup() { fi echo idle > $sys/md/sync_action cat $sys/md/sync_min > $fl - logger -p daemon.info pause checking $dev at `cat $fl` + log pause checking $dev at `cat $fl` done rm -f "$tmp" } @@ -100,42 +179,41 @@ mkdir -p /var/lib/mdcheck find /var/lib/mdcheck -name "MD_UUID*" -type f -mtime +180 -exec rm {} \; # Now look at each md device. -for dev in /dev/md?* +for sync_act in /sys/block/*/md/sync_action do - [ -e "$dev" ] || continue - sys=`sysname $dev` - if [ ! -f "$sys/md/sync_action" ] - then # cannot check this array - continue - fi - if [ "`cat $sys/md/sync_action`" != 'idle' ] + [ -e "$sync_act" ] || continue + if [ "`cat $sync_act`" != 'idle' ] then # This array is busy continue fi + sys=${sync_act%/md/*} + dev=$(devname "$sys") || continue mdadm --detail --export "$dev" | grep '^MD_UUID=' > $tmp || continue source $tmp + [[ "$MD_UUID" ]] || continue + fl="/var/lib/mdcheck/MD_UUID_$MD_UUID" - if [ -z "$cont" ] - then + checked="${fl/MD_UUID_/Checked_}" + if [[ -f "$fl" ]]; then + [[ ! -f "$checked" ]] || { + log "WARNING: $checked exists, continuing anyway" + } + start=`cat "$fl"` + elif [[ ! -f "$checked" && "$MODE" != --continue ]]; then start=0 - logger -p daemon.info mdcheck start checking $dev - elif [ -z "$MD_UUID" -o ! -f "$fl" ] - then - # Nothing to continue here + else # nothing to do continue - else - start=`cat "$fl"` - logger -p daemon.info mdcheck continue checking $dev from $start fi - cnt=$[cnt+1] + : "$((cnt+=1))" eval MD_${cnt}_fl=\$fl eval MD_${cnt}_sys=\$sys eval MD_${cnt}_dev=\$dev echo $start > $fl echo $start > $sys/md/sync_min echo check > $sys/md/sync_action + log checking $dev from $start done if [ -z "$endtime" ] @@ -156,9 +234,10 @@ do if [ "`cat $sys/md/sync_action`" != 'check' ] then - logger -p daemon.info mdcheck finished checking $dev + log finished checking $dev eval MD_${i}_fl= - rm -f $fl + rm -f "$fl" + touch "${fl/MD_UUID_/Checked_}" continue; fi read a rest < $sys/md/sync_completed diff --git a/systemd/mdcheck_continue.service b/systemd/mdcheck_continue.service index cd12db85..035fd61f 100644 --- a/systemd/mdcheck_continue.service +++ b/systemd/mdcheck_continue.service @@ -7,10 +7,11 @@ [Unit] Description=MD array scrubbing - continuation -ConditionPathExistsGlob=/var/lib/mdcheck/MD_UUID_* Documentation=man:mdadm(8) [Service] Type=simple Environment="MDADM_CHECK_DURATION=6 hours" -ExecStart=/usr/share/mdadm/mdcheck --continue --duration ${MDADM_CHECK_DURATION} +# mdcheck --maybe-start will continues previously started checks, and starts from +# zero for arrays without a "Checked_$UUID" marker in /var/lib/mdcheck +ExecStart=/usr/share/mdadm/mdcheck --maybe-start --duration ${MDADM_CHECK_DURATION} diff --git a/systemd/mdcheck_start.service b/systemd/mdcheck_start.service index 16ba6b67..c7ddd4f6 100644 --- a/systemd/mdcheck_start.service +++ b/systemd/mdcheck_start.service @@ -12,5 +12,4 @@ Documentation=man:mdadm(8) [Service] Type=simple -Environment="MDADM_CHECK_DURATION=6 hours" -ExecStart=/usr/share/mdadm/mdcheck --duration ${MDADM_CHECK_DURATION} +ExecStart=/usr/share/mdadm/mdcheck --restart diff --git a/systemd/mdcheck_start.timer b/systemd/mdcheck_start.timer index 1b8f3f20..8d09b3f6 100644 --- a/systemd/mdcheck_start.timer +++ b/systemd/mdcheck_start.timer @@ -9,7 +9,7 @@ Description=MD array scrubbing [Timer] -OnCalendar=Sun *-*-1..7 1:05:00 +OnCalendar=Sun *-*-1..7 0:45:00 [Install] WantedBy= mdmonitor.service