Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 119 additions & 40 deletions misc/mdcheck
Original file line number Diff line number Diff line change
Expand Up @@ -21,49 +21,128 @@
#
# It supports a 'time budget' such that any incomplete 'check'
# will be checkpointed when that time has expired.
# A subsequent invocation can allow the 'check' to continue.
# A subsequent invocation will allow the 'check' to continue.
#
# Arrays are identified by UUID and the 'sync_completed' value is stored
# in /var/lib/mdcheck/MD_UUID_$UUID. When the script has finished checking
# an array, it creates a file /var/lib/mdcheck/Checked_$UUID.
#
# Modes are:
# --continue Don't start new checks, only continue previously started ones
# for which MD_UUID_$UUID already exists
# --maybe-start Like --continue, but also start new checks for arrays for which
# Checked_$UUID does not exist.
# --restart: Enable restarting checks that were previously finished
# (remove all Checked_* files) and exit immediately.
# This mode doesn't start any array checks.
# --force-start: A new check from 0 will be started on all arrays, even those
# that hadn't finished before.
#
# With none of these options given, the mode defaults to --force-start
# for backward compatibility reasons.
#
# Options are:
# --continue Don't start new checks, only continue old ones.
# --duration This is passed to "date --date=$duration" to find out
# when to finish
#
# To support '--continue', arrays are identified by UUID and the 'sync_completed'
# value is stored in /var/lib/mdcheck/$UUID

# convert a /dev/md name into /sys/.../md equivalent
sysname() {
set `ls -lLd $1`
maj=${5%,}
min=$6
readlink -f /sys/dev/block/$maj:$min


# If the script is run from systemd, simply write to the journal on stderr.
# Otherwise, use logger.
log() {
if [[ "$INVOCATION_ID" ]]; then
echo "$@" >&2
else
logger -p daemon.info "mdcheck: $*"
fi
}

# get device name from sysfs
devname() {
local dev
[[ -f "$1/uevent" ]] && \
dev=$(. "$1/uevent" && echo -n "$DEVNAME")
[[ "$dev" && -b "/dev/$dev" ]] || return 1
echo -n "/dev/$dev"
}

usage() {
echo >&2 'Usage: mdcheck [mode] [options]

Mode:
--help print this help
--continue only continue previously started checks
--maybe-start continue, and start check on arrays that have not been checked
--restart re-enable checking previously finished arrays
--force-start start checking all arrays from zero (default)
Options:
--duration time-offset (must be understood by "date --date")'
}

args=$(getopt -o hcd: -l help,continue,duration: -n mdcheck -- "$@")
set_mode() {
[[ "$MODE" ]] && {
echo >&2 'ERROR: only one of --continue, --maybe-start, --restart, or --force-start may be set
'
usage
exit 1
}
MODE=$1
}

args=$(getopt -o hcmrfd: -l help,continue,maybe-start,restart,force-start,duration: -n mdcheck -- "$@")
rv=$?
if [ $rv -ne 0 ]; then exit $rv; fi
if [ $rv -ne 0 ]; then
usage
exit $rv
fi

eval set -- $args

cont=
MODE=
endtime=
while [ " $1" != " --" ]
do
case $1 in
--help )
echo >&2 'Usage: mdcheck [--continue] [--duration time-offset]'
echo >&2 ' time-offset must be understood by "date --date"'
usage
exit 0
;;
--continue ) cont=yes ;;
--duration ) shift; dur=$1
--continue|--maybe-start|--restart|--force-start)
set_mode "$1"
;;
--duration )
shift; dur=$1
endtime=$(date --date "$dur" "+%s")
;;
esac
shift
done
shift

[[ $# -eq 0 ]] || {
usage
exit 1
}

[[ "$MODE" ]] || {
echo >&2 'mdcheck: WARNING: no mode specified, defaulting to --force-start'
echo >&2 '=== Type ctrl-c within 5 seconds if this is not intended'
sleep 5
set_mode --force-start
echo >&2 'mdcheck is running in --force-start mode.'
}

case $MODE in
--restart)
log 'Re-enabling array checks for previously finished arrays'
rm -f /var/lib/mdcheck/Checked_*
exit 0
;;
--force-start)
log 'Forcing array check from 0 for all arrays'
rm -f /var/lib/mdcheck/Checked_* /var/lib/mdcheck/MD_UUID_*
;;
esac

# We need a temp file occasionally...
tmp=/var/lib/mdcheck/.md-check-$$
cnt=0
Expand All @@ -87,7 +166,7 @@ cleanup() {
fi
echo idle > $sys/md/sync_action
cat $sys/md/sync_min > $fl
logger -p daemon.info pause checking $dev at `cat $fl`
log pause checking $dev at `cat $fl`
done
rm -f "$tmp"
}
Expand All @@ -100,42 +179,41 @@ mkdir -p /var/lib/mdcheck
find /var/lib/mdcheck -name "MD_UUID*" -type f -mtime +180 -exec rm {} \;

# Now look at each md device.
for dev in /dev/md?*
for sync_act in /sys/block/*/md/sync_action
do
[ -e "$dev" ] || continue
sys=`sysname $dev`
if [ ! -f "$sys/md/sync_action" ]
then # cannot check this array
continue
fi
if [ "`cat $sys/md/sync_action`" != 'idle' ]
[ -e "$sync_act" ] || continue
if [ "`cat $sync_act`" != 'idle' ]
then # This array is busy
continue
fi

sys=${sync_act%/md/*}
dev=$(devname "$sys") || continue
mdadm --detail --export "$dev" | grep '^MD_UUID=' > $tmp || continue
source $tmp
[[ "$MD_UUID" ]] || continue

fl="/var/lib/mdcheck/MD_UUID_$MD_UUID"
if [ -z "$cont" ]
then
checked="${fl/MD_UUID_/Checked_}"
if [[ -f "$fl" ]]; then
[[ ! -f "$checked" ]] || {
log "WARNING: $checked exists, continuing anyway"
}
start=`cat "$fl"`
elif [[ ! -f "$checked" && "$MODE" != --continue ]]; then
start=0
logger -p daemon.info mdcheck start checking $dev
elif [ -z "$MD_UUID" -o ! -f "$fl" ]
then
# Nothing to continue here
else # nothing to do
continue
else
start=`cat "$fl"`
logger -p daemon.info mdcheck continue checking $dev from $start
fi

cnt=$[cnt+1]
: "$((cnt+=1))"
eval MD_${cnt}_fl=\$fl
eval MD_${cnt}_sys=\$sys
eval MD_${cnt}_dev=\$dev
echo $start > $fl
echo $start > $sys/md/sync_min
echo check > $sys/md/sync_action
log checking $dev from $start
done

if [ -z "$endtime" ]
Expand All @@ -156,9 +234,10 @@ do

if [ "`cat $sys/md/sync_action`" != 'check' ]
then
logger -p daemon.info mdcheck finished checking $dev
log finished checking $dev
eval MD_${i}_fl=
rm -f $fl
rm -f "$fl"
touch "${fl/MD_UUID_/Checked_}"
continue;
fi
read a rest < $sys/md/sync_completed
Expand Down
5 changes: 3 additions & 2 deletions systemd/mdcheck_continue.service
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@

[Unit]
Description=MD array scrubbing - continuation
ConditionPathExistsGlob=/var/lib/mdcheck/MD_UUID_*
Documentation=man:mdadm(8)

[Service]
Type=simple
Environment="MDADM_CHECK_DURATION=6 hours"
ExecStart=/usr/share/mdadm/mdcheck --continue --duration ${MDADM_CHECK_DURATION}
# mdcheck --maybe-start will continues previously started checks, and starts from
# zero for arrays without a "Checked_$UUID" marker in /var/lib/mdcheck
ExecStart=/usr/share/mdadm/mdcheck --maybe-start --duration ${MDADM_CHECK_DURATION}
3 changes: 1 addition & 2 deletions systemd/mdcheck_start.service
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,4 @@ Documentation=man:mdadm(8)

[Service]
Type=simple
Environment="MDADM_CHECK_DURATION=6 hours"
ExecStart=/usr/share/mdadm/mdcheck --duration ${MDADM_CHECK_DURATION}
ExecStart=/usr/share/mdadm/mdcheck --restart
2 changes: 1 addition & 1 deletion systemd/mdcheck_start.timer
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
Description=MD array scrubbing

[Timer]
OnCalendar=Sun *-*-1..7 1:05:00
OnCalendar=Sun *-*-1..7 0:45:00

[Install]
WantedBy= mdmonitor.service
Expand Down
Loading