Skip to content

Commit 5632cca

Browse files
committed
fuse: Add DLM retry workaround for iomap write failures
When the FUSE server returns -EAGAIN during write-back operations (signaled by DLM), the write fails with an IO error. This happens because: 1. Page invalidation holds DLM lock and needs folio lock 2. iomap write path holds folio lock and calls fuse_iomap_read_folio_range() 3. FUSE gets -EAGAIN from server (cannot acquire DLM lock - would deadlock) 4. fuse_do_readfolio() converts -EAGAIN to AOP_TRUNCATED_PAGE and unlocks folio (This prevents the deadlock by releasing the folio lock) 5. However, iomap doesn't understand AOP_TRUNCATED_PAGE and treats it as error 6. Result: Write fails with IO error, even though it's just temporary contention This is a FUSE-only workaround until mainline iomap gains AOP_TRUNCATED_PAGE retry support. The solution: 1. Stack-allocate retry state in fuse_cache_write_iter() 2. Register it in fuse_conn xarray before calling iomap (indexed by task pointer) 3. When fuse_iomap_read_folio_range() sees AOP_TRUNCATED_PAGE: - Mark the retry flag in the registered state - Convert to -EAGAIN for iomap 4. After iomap returns, check the retry flag 5. If set, retry the entire write operation 6. Remove from xarray when done (or keep for next retry iteration) This allows writes to succeed by retrying after the DLM lock contention clears, rather than failing with IO error. Technical flow showing why iov_iter is not advanced on -EAGAIN: fuse_cache_write_iter() total_written = 0 retry: iomap_file_buffered_write() iomap_write_iter() [write loop] iomap_write_begin() __iomap_write_begin() Need read? → Yes read_folio_range() FUSE server -EAGAIN? Yes → Set retry flag, return -EAGAIN No → Success Error → Break loop [iov_iter NOT advanced] Success → Continue copy_folio_from_iter_atomic() [Advances iov_iter] iomap_write_end() Advance iter.pos Loop while more data Update iocb->ki_pos = iter.pos Return bytes written if (written > 0) total_written += written if (retry_needed) goto retry return total_written Signed-off-by: Bernd Schubert <bernd@bsbernd.com>
1 parent f0cc511 commit 5632cca

3 files changed

Lines changed: 127 additions & 11 deletions

File tree

fs/fuse/file.c

Lines changed: 108 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -960,8 +960,8 @@ static int fuse_do_readfolio(struct file *file, struct folio *folio,
960960
if (res < 0) {
961961
/*
962962
* please refer to Documentation/filesystems/fuse/fuse-AOP_TRUNCATED_PAGE-reason.txt
963-
* why this is necessarry.
964-
* READ can return -EAGAIN from DLM subsystem
963+
* why READ can return -EAGAIN from DLM subsystem.
964+
* XXX find a better DLM specific error code
965965
*/
966966
if (res == -EAGAIN && fm->fc->dlm)
967967
res = AOP_TRUNCATED_PAGE;
@@ -1001,8 +1001,45 @@ static int fuse_iomap_read_folio_range(const struct iomap_iter *iter,
10011001
size_t len)
10021002
{
10031003
struct file *file = iter->private;
1004+
struct inode *inode = file_inode(file);
1005+
struct fuse_conn *fc = get_fuse_conn(inode);
10041006
size_t off = offset_in_folio(folio, pos);
1005-
return fuse_do_readfolio(file, folio, off, len);
1007+
int ret;
1008+
ret = fuse_do_readfolio(file, folio, off, len);
1009+
1010+
/*
1011+
* TEMPORARY WORKAROUND for iomap write deadlock:
1012+
*
1013+
* When FUSE server returns -EAGAIN due to DLM,
1014+
* fuse_do_readfolio() converts it to AOP_TRUNCATED_PAGE and
1015+
* unlocks the folio (per AOP_TRUNCATED_PAGE contract).
1016+
*
1017+
* However, iomap doesn't understand AOP_TRUNCATED_PAGE.
1018+
* We need to:
1019+
* 1. Mark the retry flag (caller stored it in xarray)
1020+
* 2. Convert to -EAGAIN so iomap sees an error
1021+
* 3. Let fuse_cache_write_iter() detect and retry
1022+
*
1023+
* This breaks the ABBA deadlock:
1024+
* - Folio is unlocked (page invalidation can proceed)
1025+
* - Write will be retried at higher level
1026+
*
1027+
* Remove this when mainline iomap gains AOP_TRUNCATED_PAGE support.
1028+
*/
1029+
if (ret == AOP_TRUNCATED_PAGE) {
1030+
struct fuse_dlm_retry *retry;
1031+
unsigned long task_key = (unsigned long)current;
1032+
1033+
retry = xa_load(&fc->dlm_retry_tasks, task_key);
1034+
if (retry) {
1035+
retry->retry_needed = true;
1036+
}
1037+
1038+
/* Convert to -EAGAIN for iomap */
1039+
ret = -EAGAIN;
1040+
}
1041+
1042+
return ret;
10061043
}
10071044

10081045
static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
@@ -1555,6 +1592,69 @@ static const struct iomap_ops fuse_iomap_ops = {
15551592
.iomap_begin = fuse_iomap_begin,
15561593
};
15571594

1595+
static ssize_t fuse_writeback_write_iter(struct kiocb *iocb,
1596+
struct iov_iter *from,
1597+
struct file *file)
1598+
{
1599+
struct fuse_conn *fc = get_fuse_conn(file_inode(file));
1600+
ssize_t written, total_written = 0;
1601+
1602+
/*
1603+
* TEMPORARY WORKAROUND for iomap write deadlock:
1604+
*
1605+
* Stack-allocate retry state and register it before calling
1606+
* iomap. If fuse_iomap_read_folio_range() encounters
1607+
* AOP_TRUNCATED_PAGE, it will mark retry_needed.
1608+
*
1609+
* Stack allocation ensures no memory leaks - the state is
1610+
* valid for the duration of this function call and is
1611+
* automatically cleaned up.
1612+
*/
1613+
struct fuse_dlm_retry retry_state = {
1614+
.retry_needed = false,
1615+
};
1616+
unsigned long task_key = (unsigned long)current;
1617+
int xa_ret;
1618+
1619+
xa_ret = xa_err(xa_store(&fc->dlm_retry_tasks, task_key,
1620+
&retry_state, GFP_KERNEL));
1621+
if (xa_ret)
1622+
return xa_ret;
1623+
1624+
retry:
1625+
/*
1626+
* Use iomap so that we can do granular uptodate reads
1627+
* and granular dirty tracking for large folios.
1628+
*/
1629+
written = iomap_file_buffered_write(iocb, from, &fuse_iomap_ops,
1630+
&fuse_iomap_write_ops, file);
1631+
1632+
if (written > 0)
1633+
total_written += written;
1634+
1635+
/*
1636+
* If DLM lock contention occurred (AOP_TRUNCATED_PAGE),
1637+
* retry the entire write operation.
1638+
*
1639+
* The folio has been unlocked by fuse_do_readfolio(),
1640+
* breaking the ABBA deadlock with page invalidation.
1641+
*
1642+
* Keep the entry in xarray and reuse it for the retry.
1643+
*
1644+
* Remove this when mainline iomap gains AOP_TRUNCATED_PAGE
1645+
* retry support.
1646+
*/
1647+
if (retry_state.retry_needed) {
1648+
retry_state.retry_needed = false;
1649+
goto retry;
1650+
}
1651+
1652+
/* Remove from xarray now that we're done */
1653+
xa_erase(&fc->dlm_retry_tasks, task_key);
1654+
1655+
return written < 0 ? written : total_written;
1656+
}
1657+
15581658
static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
15591659
{
15601660
struct file *file = iocb->ki_filp;
@@ -1621,14 +1721,11 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
16211721
written = direct_write_fallback(iocb, from, written,
16221722
fuse_perform_write(iocb, from));
16231723
} else if (writeback) {
1624-
/*
1625-
* Use iomap so that we can do granular uptodate reads
1626-
* and granular dirty tracking for large folios.
1627-
*/
1628-
written = iomap_file_buffered_write(iocb, from,
1629-
&fuse_iomap_ops,
1630-
&fuse_iomap_write_ops,
1631-
file);
1724+
written = fuse_writeback_write_iter(iocb, from, file);
1725+
if (written < 0) {
1726+
err = written;
1727+
goto out;
1728+
}
16321729
} else {
16331730
written = fuse_perform_write(iocb, from);
16341731
}

fs/fuse/fuse_i.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -639,6 +639,17 @@ struct fuse_sync_bucket {
639639
struct rcu_head rcu;
640640
};
641641

642+
/**
643+
* DLM retry tracking for iomap write deadlock workaround.
644+
*
645+
* Temporary workaround until mainline iomap gains AOP_TRUNCATED_PAGE
646+
* retry support. Tracks tasks that need to retry write operations due
647+
* to DLM lock contention (-EAGAIN from FUSE server).
648+
*/
649+
struct fuse_dlm_retry {
650+
bool retry_needed;
651+
};
652+
642653
/**
643654
* A Fuse connection.
644655
*
@@ -1022,6 +1033,12 @@ struct fuse_conn {
10221033
/* The foffset alignment in PAGE */
10231034
unsigned int alignment_pages;
10241035

1036+
/**
1037+
* XArray tracking tasks that need DLM retry.
1038+
* Maps task pointer -> struct fuse_dlm_retry.
1039+
* Temporary workaround for iomap write deadlock.
1040+
*/
1041+
struct xarray dlm_retry_tasks;
10251042
};
10261043

10271044
/*

fs/fuse/inode.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,6 +1056,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
10561056
fc->initialized = 0;
10571057
fc->connected = 1;
10581058
fc->dlm = 1;
1059+
xa_init(&fc->dlm_retry_tasks);
10591060

10601061
/* module option for now */
10611062
fc->compound_open_getattr = enable_compound;
@@ -1109,6 +1110,7 @@ void fuse_conn_put(struct fuse_conn *fc)
11091110
}
11101111
if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
11111112
fuse_backing_files_free(fc);
1113+
xa_destroy(&fc->dlm_retry_tasks);
11121114
call_rcu(&fc->rcu, delayed_release);
11131115
}
11141116
}

0 commit comments

Comments
 (0)