Skip to content

Commit 3d15a6d

Browse files
committed
drbd: online verify: account for "skipped" blocks
In a multi-node scenario, to avoid a potential distributed deadlock, after receiving a P_OV_REQUEST, we sometimes have to send back P_RS_CANCEL instead of P_OV_REPLY, effectively skipping verify for those blocks (they are "too busy" currently). If you start verify on a Secondary, this may happen frequently. If you start verify on the Primary, that should happen very rarely, We forgot to account for these skipped blocks, though, ov_left would then never reach zero. It would then appear that verify was stuck at "almost" 100%, but never actually finishing. This change should fix that.
1 parent b9920c3 commit 3d15a6d

File tree

4 files changed

+92
-54
lines changed

4 files changed

+92
-54
lines changed

drbd/drbd_int.h

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1144,6 +1144,10 @@ struct drbd_peer_device {
11441144
sector_t ov_last_oos_start;
11451145
/* size of out-of-sync range in sectors. */
11461146
sector_t ov_last_oos_size;
1147+
/* Start sector of skipped range (to merge printk reporting). */
1148+
sector_t ov_last_skipped_start;
1149+
/* size of skipped range in sectors. */
1150+
sector_t ov_last_skipped_size;
11471151
int c_sync_rate; /* current resync rate after syncer throttle magic */
11481152
struct fifo_buffer *rs_plan_s; /* correction values of resync planer (RCU, connection->conn_update) */
11491153
atomic_t rs_sect_in; /* for incoming resync data rate, SyncTarget */
@@ -1152,6 +1156,7 @@ struct drbd_peer_device {
11521156
* on the lower level device when we last looked. */
11531157
int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
11541158
unsigned long ov_left; /* in bits */
1159+
unsigned long ov_skipped; /* in bits */
11551160

11561161
u64 current_uuid;
11571162
u64 bitmap_uuids[DRBD_PEERS_MAX];
@@ -1850,6 +1855,8 @@ extern void drbd_start_resync(struct drbd_peer_device *, enum drbd_repl_state);
18501855
extern void resume_next_sg(struct drbd_device *device);
18511856
extern void suspend_other_sg(struct drbd_device *device);
18521857
extern int drbd_resync_finished(struct drbd_peer_device *, enum drbd_disk_state);
1858+
extern void verify_progress(struct drbd_peer_device *peer_device,
1859+
const sector_t sector, const unsigned int size);
18531860
/* maybe rather drbd_main.c ? */
18541861
extern void *drbd_md_get_buffer(struct drbd_device *device, const char *intent);
18551862
extern void drbd_md_put_buffer(struct drbd_device *device);
@@ -1873,6 +1880,15 @@ static inline void ov_out_of_sync_print(struct drbd_peer_device *peer_device)
18731880
peer_device->ov_last_oos_size = 0;
18741881
}
18751882

1883+
static inline void ov_skipped_print(struct drbd_peer_device *peer_device)
1884+
{
1885+
if (peer_device->ov_last_skipped_size) {
1886+
drbd_info(peer_device, "Skipped verify, too busy: start=%llu, size=%lu (sectors)\n",
1887+
(unsigned long long)peer_device->ov_last_skipped_start,
1888+
(unsigned long)peer_device->ov_last_skipped_size);
1889+
}
1890+
peer_device->ov_last_skipped_size = 0;
1891+
}
18761892

18771893
extern void drbd_csum_bio(struct crypto_ahash *, struct bio *, void *);
18781894
extern void drbd_csum_pages(struct crypto_ahash *, struct page *, void *);
@@ -1882,7 +1898,6 @@ extern int w_e_end_rsdata_req(struct drbd_work *, int);
18821898
extern int w_e_end_csum_rs_req(struct drbd_work *, int);
18831899
extern int w_e_end_ov_reply(struct drbd_work *, int);
18841900
extern int w_e_end_ov_req(struct drbd_work *, int);
1885-
extern int w_ov_finished(struct drbd_work *, int);
18861901
extern int w_resync_timer(struct drbd_work *, int);
18871902
extern int w_send_dblock(struct drbd_work *, int);
18881903
extern int w_send_read_req(struct drbd_work *, int);
@@ -2492,6 +2507,13 @@ static inline bool is_sync_state(struct drbd_peer_device *peer_device,
24922507
is_sync_target_state(peer_device, which);
24932508
}
24942509

2510+
static inline bool is_verify_state(struct drbd_peer_device *peer_device,
2511+
enum which_state which)
2512+
{
2513+
enum drbd_repl_state repl_state = peer_device->repl_state[which];
2514+
return repl_state == L_VERIFY_S || repl_state == L_VERIFY_T;
2515+
}
2516+
24952517
/**
24962518
* get_ldev() - Increase the ref count on device->ldev. Returns 0 if there is no ldev
24972519
* @_device: DRBD device.

drbd/drbd_receiver.c

Lines changed: 28 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2944,6 +2944,20 @@ bool drbd_rs_c_min_rate_throttle(struct drbd_peer_device *peer_device)
29442944
return false;
29452945
}
29462946

2947+
static void verify_skipped_block(struct drbd_peer_device *peer_device,
2948+
const sector_t sector, const unsigned int size)
2949+
{
2950+
++peer_device->ov_skipped;
2951+
if (peer_device->ov_last_skipped_start + peer_device->ov_last_skipped_size == sector) {
2952+
peer_device->ov_last_skipped_size += size>>9;
2953+
} else {
2954+
ov_skipped_print(peer_device);
2955+
peer_device->ov_last_skipped_start = sector;
2956+
peer_device->ov_last_skipped_size = size>>9;
2957+
}
2958+
verify_progress(peer_device, sector, size);
2959+
}
2960+
29472961
static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
29482962
{
29492963
struct drbd_peer_device *peer_device;
@@ -2985,10 +2999,11 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
29852999
case P_DATA_REQUEST:
29863000
drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
29873001
break;
3002+
case P_OV_REQUEST:
3003+
verify_skipped_block(peer_device, sector, size);
29883004
case P_RS_THIN_REQ:
29893005
case P_RS_DATA_REQUEST:
29903006
case P_CSUM_RS_REQUEST:
2991-
case P_OV_REQUEST:
29923007
drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
29933008
break;
29943009
case P_OV_REPLY:
@@ -3084,6 +3099,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
30843099
peer_device->ov_start_sector = sector;
30853100
peer_device->ov_position = sector;
30863101
peer_device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
3102+
peer_device->ov_skipped = 0;
30873103
peer_device->rs_total = peer_device->ov_left;
30883104
for (i = 0; i < DRBD_SYNC_MARKS; i++) {
30893105
peer_device->rs_mark_left[i] = peer_device->ov_left;
@@ -3141,6 +3157,8 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
31413157
Instruct the SyncSource to retry */
31423158
err = drbd_try_rs_begin_io(peer_device, sector, false);
31433159
if (err) {
3160+
if (pi->cmd == P_OV_REQUEST)
3161+
verify_skipped_block(peer_device, sector, size);
31443162
err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req);
31453163
/* If err is set, we will drop the connection... */
31463164
goto fail3;
@@ -8320,10 +8338,14 @@ static int got_NegRSDReply(struct drbd_connection *connection, struct packet_inf
83208338
drbd_rs_failed_io(peer_device, sector, size);
83218339
break;
83228340
case P_RS_CANCEL:
8323-
bit = BM_SECT_TO_BIT(sector);
8324-
mutex_lock(&device->bm_resync_fo_mutex);
8325-
device->bm_resync_fo = min(device->bm_resync_fo, bit);
8326-
mutex_unlock(&device->bm_resync_fo_mutex);
8341+
if (peer_device->repl_state[NOW] == L_VERIFY_S) {
8342+
verify_skipped_block(peer_device, sector, size);
8343+
} else {
8344+
bit = BM_SECT_TO_BIT(sector);
8345+
mutex_lock(&device->bm_resync_fo_mutex);
8346+
device->bm_resync_fo = min(device->bm_resync_fo, bit);
8347+
mutex_unlock(&device->bm_resync_fo_mutex);
8348+
}
83278349

83288350
atomic_add(size >> 9, &peer_device->rs_sect_in);
83298351
mod_timer(&peer_device->resync_timer, jiffies + SLEEP_TIME);
@@ -8375,24 +8397,8 @@ static int got_OVResult(struct drbd_connection *connection, struct packet_info *
83758397
drbd_rs_complete_io(peer_device, sector);
83768398
dec_rs_pending(peer_device);
83778399

8378-
--peer_device->ov_left;
8379-
8380-
/* let's advance progress step marks only for every other megabyte */
8381-
if ((peer_device->ov_left & 0x200) == 0x200)
8382-
drbd_advance_rs_marks(peer_device, peer_device->ov_left);
8400+
verify_progress(peer_device, sector, size);
83838401

8384-
if (peer_device->ov_left == 0) {
8385-
struct drbd_peer_device_work *dw = kmalloc(sizeof(*dw), GFP_NOIO);
8386-
if (dw) {
8387-
dw->w.cb = w_ov_finished;
8388-
dw->peer_device = peer_device;
8389-
drbd_queue_work(&connection->sender_work, &dw->w);
8390-
} else {
8391-
drbd_err(device, "kmalloc(dw) failed.");
8392-
ov_out_of_sync_print(peer_device);
8393-
drbd_resync_finished(peer_device, D_MASK);
8394-
}
8395-
}
83968402
put_ldev(device);
83978403
return 0;
83988404
}

drbd/drbd_sender.c

Lines changed: 38 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -873,18 +873,6 @@ static int make_ov_request(struct drbd_peer_device *peer_device, int cancel)
873873
return 1;
874874
}
875875

876-
int w_ov_finished(struct drbd_work *w, int cancel)
877-
{
878-
struct drbd_peer_device_work *dw =
879-
container_of(w, struct drbd_peer_device_work, w);
880-
struct drbd_peer_device *peer_device = dw->peer_device;
881-
kfree(dw);
882-
ov_out_of_sync_print(peer_device);
883-
drbd_resync_finished(peer_device, D_MASK);
884-
885-
return 0;
886-
}
887-
888876
struct resync_finished_work {
889877
struct drbd_peer_device_work pdw;
890878
enum drbd_disk_state new_peer_disk_state;
@@ -1091,17 +1079,22 @@ int drbd_resync_finished(struct drbd_peer_device *peer_device,
10911079
__change_repl_state(peer_device, L_ESTABLISHED);
10921080

10931081
aborted = device->disk_state[NOW] == D_OUTDATED && new_peer_disk_state == D_INCONSISTENT;
1094-
1095-
drbd_info(peer_device, "%s %s (total %lu sec; paused %lu sec; %lu K/sec)\n",
1082+
{
1083+
char tmp[sizeof(" but 01234567890123456789 4k blocks skipped")] = "";
1084+
if (verify_done && peer_device->ov_skipped)
1085+
snprintf(tmp, sizeof(tmp), " but %lu %dk blocks skipped",
1086+
peer_device->ov_skipped, Bit2KB(1));
1087+
drbd_info(peer_device, "%s %s%s (total %lu sec; paused %lu sec; %lu K/sec)\n",
10961088
verify_done ? "Online verify" : "Resync",
1097-
aborted ? "aborted" : "done",
1089+
aborted ? "aborted" : "done", tmp,
10981090
dt + peer_device->rs_paused, peer_device->rs_paused, dbdt);
1091+
}
10991092

11001093
n_oos = drbd_bm_total_weight(peer_device);
11011094

11021095
if (repl_state[NOW] == L_VERIFY_S || repl_state[NOW] == L_VERIFY_T) {
11031096
if (n_oos) {
1104-
drbd_alert(peer_device, "Online verify found %lu %dk block out of sync!\n",
1097+
drbd_alert(peer_device, "Online verify found %lu %dk blocks out of sync!\n",
11051098
n_oos, Bit2KB(1));
11061099
khelper_cmd = "out-of-sync";
11071100
}
@@ -1490,12 +1483,31 @@ void drbd_ov_out_of_sync_found(struct drbd_peer_device *peer_device, sector_t se
14901483
if (peer_device->ov_last_oos_start + peer_device->ov_last_oos_size == sector) {
14911484
peer_device->ov_last_oos_size += size>>9;
14921485
} else {
1486+
ov_out_of_sync_print(peer_device);
14931487
peer_device->ov_last_oos_start = sector;
14941488
peer_device->ov_last_oos_size = size>>9;
14951489
}
14961490
drbd_set_out_of_sync(peer_device, sector, size);
14971491
}
14981492

1493+
void verify_progress(struct drbd_peer_device *peer_device,
1494+
const sector_t sector, const unsigned int size)
1495+
{
1496+
bool stop_sector_reached =
1497+
(peer_device->repl_state[NOW] == L_VERIFY_S) &&
1498+
verify_can_do_stop_sector(peer_device) &&
1499+
(sector + (size>>9)) >= peer_device->ov_stop_sector;
1500+
1501+
--peer_device->ov_left;
1502+
1503+
/* let's advance progress step marks only for every other megabyte */
1504+
if ((peer_device->ov_left & 0x1ff) == 0)
1505+
drbd_advance_rs_marks(peer_device, peer_device->ov_left);
1506+
1507+
if (peer_device->ov_left == 0 || stop_sector_reached)
1508+
drbd_peer_device_post_work(peer_device, RS_DONE);
1509+
}
1510+
14991511
int w_e_end_ov_reply(struct drbd_work *w, int cancel)
15001512
{
15011513
struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
@@ -1507,7 +1519,6 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel)
15071519
unsigned int size = peer_req->i.size;
15081520
int digest_size;
15091521
int err, eq = 0;
1510-
bool stop_sector_reached = false;
15111522

15121523
if (unlikely(cancel)) {
15131524
drbd_free_peer_req(peer_req);
@@ -1552,19 +1563,7 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel)
15521563

15531564
dec_unacked(peer_device);
15541565

1555-
--peer_device->ov_left;
1556-
1557-
/* let's advance progress step marks only for every other megabyte */
1558-
if ((peer_device->ov_left & 0x200) == 0x200)
1559-
drbd_advance_rs_marks(peer_device, peer_device->ov_left);
1560-
1561-
stop_sector_reached = verify_can_do_stop_sector(peer_device) &&
1562-
(sector + (size>>9)) >= peer_device->ov_stop_sector;
1563-
1564-
if (peer_device->ov_left == 0 || stop_sector_reached) {
1565-
ov_out_of_sync_print(peer_device);
1566-
drbd_resync_finished(peer_device, D_MASK);
1567-
}
1566+
verify_progress(peer_device, sector, size);
15681567

15691568
return err;
15701569
}
@@ -2085,7 +2084,15 @@ static void update_on_disk_bitmap(struct drbd_peer_device *peer_device, bool res
20852084

20862085
drbd_bm_write_lazy(device, 0);
20872086

2088-
if (resync_done && is_sync_state(peer_device, NOW))
2087+
if (resync_done) {
2088+
if (is_verify_state(peer_device, NOW)) {
2089+
ov_out_of_sync_print(peer_device);
2090+
ov_skipped_print(peer_device);
2091+
} else
2092+
resync_done = is_sync_state(peer_device, NOW);
2093+
}
2094+
2095+
if (resync_done)
20892096
drbd_resync_finished(peer_device, D_MASK);
20902097

20912098
/* update timestamp, in case it took a while to write out stuff */

drbd/drbd_state.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2037,6 +2037,7 @@ static void set_ov_position(struct drbd_peer_device *peer_device,
20372037
peer_device->ov_position = peer_device->ov_start_sector;
20382038
}
20392039
peer_device->ov_left = peer_device->rs_total;
2040+
peer_device->ov_skipped = 0;
20402041
}
20412042

20422043
static void queue_after_state_change_work(struct drbd_resource *resource,
@@ -2249,6 +2250,8 @@ static void finish_state_change(struct drbd_resource *resource, struct completio
22492250
peer_device->rs_last_sect_ev = 0;
22502251
peer_device->ov_last_oos_size = 0;
22512252
peer_device->ov_last_oos_start = 0;
2253+
peer_device->ov_last_skipped_size = 0;
2254+
peer_device->ov_last_skipped_start = 0;
22522255

22532256
for (i = 0; i < DRBD_SYNC_MARKS; i++) {
22542257
peer_device->rs_mark_left[i] = peer_device->ov_left;

0 commit comments

Comments
 (0)