Skip to content
61 changes: 53 additions & 8 deletions drivers/md/md.c
Original file line number Diff line number Diff line change
Expand Up @@ -701,6 +701,7 @@ int mddev_init(struct mddev *mddev)
atomic_set(&mddev->openers, 0);
atomic_set(&mddev->sync_seq, 0);
spin_lock_init(&mddev->lock);
spin_lock_init(&mddev->error_handle_lock);
init_waitqueue_head(&mddev->sb_wait);
init_waitqueue_head(&mddev->recovery_wait);
mddev->reshape_position = MaxSector;
Expand Down Expand Up @@ -986,14 +987,9 @@ static void super_written(struct bio *bio)
if (bio->bi_status) {
pr_err("md: %s gets error=%d\n", __func__,
blk_status_to_errno(bio->bi_status));
md_error(mddev, rdev);
if (!test_bit(Faulty, &rdev->flags)
&& (bio->bi_opf & MD_FAILFAST)) {
if (!md_bio_failure_error(mddev, rdev, bio))
set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
set_bit(LastDev, &rdev->flags);
}
} else
clear_bit(LastDev, &rdev->flags);
}

bio_put(bio);

Expand Down Expand Up @@ -8186,7 +8182,7 @@ void md_unregister_thread(struct mddev *mddev, struct md_thread __rcu **threadp)
}
EXPORT_SYMBOL(md_unregister_thread);

void md_error(struct mddev *mddev, struct md_rdev *rdev)
void _md_error(struct mddev *mddev, struct md_rdev *rdev)
{
if (!rdev || test_bit(Faulty, &rdev->flags))
return;
Expand All @@ -8211,8 +8207,57 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
queue_work(md_misc_wq, &mddev->event_work);
md_new_event();
}

void md_error(struct mddev *mddev, struct md_rdev *rdev)
{
spin_lock(&mddev->error_handle_lock);
_md_error(mddev, rdev);
spin_unlock(&mddev->error_handle_lock);
}
EXPORT_SYMBOL(md_error);

/** md_bio_failure_error() - md error handler for MD_FAILFAST bios
* @mddev: affected md device.
* @rdev: member device to fail.
* @bio: bio whose triggered device failure.
*
* This is almost the same as md_error(). That is, it is serialized at
* the same level as md_error, marks the rdev as Faulty, and changes
* the mddev status.
* However, if all of the following conditions are met, it does nothing.
* This is because MD_FAILFAST bios must not stopping the array.
* * RAID1 or RAID10
* * LastDev - if rdev becomes Faulty, mddev will stop
* * The failed bio has MD_FAILFAST set
*
* Returns: true if _md_error() was called, false if not.
*/
bool md_bio_failure_error(struct mddev *mddev, struct md_rdev *rdev, struct bio *bio)
{
bool do_md_error = true;

spin_lock(&mddev->error_handle_lock);
if (mddev->pers) {
if (mddev->pers->head.id == ID_RAID1 ||
mddev->pers->head.id == ID_RAID10) {
if (test_bit(LastDev, &rdev->flags) &&
test_bit(FailFast, &rdev->flags) &&
bio != NULL && (bio->bi_opf & MD_FAILFAST))
do_md_error = false;
}
}

if (do_md_error)
_md_error(mddev, rdev);
else
pr_warn_ratelimited("md: %s: %s didn't do anything for %pg\n",
mdname(mddev), __func__, rdev->bdev);

spin_unlock(&mddev->error_handle_lock);
return do_md_error;
}
EXPORT_SYMBOL(md_bio_failure_error);

/* seq_file implementation /proc/mdstat */

static void status_unused(struct seq_file *seq)
Expand Down
12 changes: 9 additions & 3 deletions drivers/md/md.h
Original file line number Diff line number Diff line change
Expand Up @@ -281,9 +281,10 @@ enum flag_bits {
* It is expects that no bad block log
* is present.
*/
LastDev, /* Seems to be the last working dev as
* it didn't fail, so don't use FailFast
* any more for metadata
LastDev, /* This is the last working rdev.
* so don't use FailFast any more for
* metadata and don't Fail rdev
* when FailFast bio failure.
*/
CollisionCheck, /*
* check if there is collision between raid1
Expand Down Expand Up @@ -619,6 +620,9 @@ struct mddev {
/* The sequence number for sync thread */
atomic_t sync_seq;

/* Lock for serializing md_error */
spinlock_t error_handle_lock;

bool has_superblocks:1;
bool fail_last_dev:1;
bool serialize_policy:1;
Expand Down Expand Up @@ -879,7 +883,9 @@ extern void md_write_start(struct mddev *mddev, struct bio *bi);
extern void md_write_inc(struct mddev *mddev, struct bio *bi);
extern void md_write_end(struct mddev *mddev);
extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
void _md_error(struct mddev *mddev, struct md_rdev *rdev);
extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
extern bool md_bio_failure_error(struct mddev *mddev, struct md_rdev *rdev, struct bio *bio);
extern void md_finish_reshape(struct mddev *mddev);
void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
struct bio *bio, sector_t start, sector_t size);
Expand Down
Loading