Skip to content

Commit 63a42e1

Browse files
committed
Merge tag 'for-4.20-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: "Several fixes to recent release (4.19, fixes tagged for stable) and other fixes" * tag 'for-4.20-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: Btrfs: fix missing delayed iputs on unmount Btrfs: fix data corruption due to cloning of eof block Btrfs: fix infinite loop on inode eviction after deduplication of eof block Btrfs: fix deadlock on tree root leaf when finding free extent btrfs: avoid link error with CONFIG_NO_AUTO_INLINE btrfs: tree-checker: Fix misleading group system information Btrfs: fix missing data checksums after a ranged fsync (msync) btrfs: fix pinned underflow after transaction aborted Btrfs: fix cur_offset in the error case for nocow
2 parents c140f8b + d6fd0ae commit 63a42e1

File tree

8 files changed

+107
-57
lines changed

8 files changed

+107
-57
lines changed

fs/btrfs/ctree.h

+3
Original file line numberDiff line numberDiff line change
@@ -3163,6 +3163,9 @@ void btrfs_destroy_inode(struct inode *inode);
31633163
int btrfs_drop_inode(struct inode *inode);
31643164
int __init btrfs_init_cachep(void);
31653165
void __cold btrfs_destroy_cachep(void);
3166+
struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
3167+
struct btrfs_root *root, int *new,
3168+
struct btrfs_path *path);
31663169
struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
31673170
struct btrfs_root *root, int *was_new);
31683171
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,

fs/btrfs/disk-io.c

+26-37
Original file line numberDiff line numberDiff line change
@@ -1664,9 +1664,8 @@ static int cleaner_kthread(void *arg)
16641664
struct btrfs_root *root = arg;
16651665
struct btrfs_fs_info *fs_info = root->fs_info;
16661666
int again;
1667-
struct btrfs_trans_handle *trans;
16681667

1669-
do {
1668+
while (1) {
16701669
again = 0;
16711670

16721671
/* Make the cleaner go to sleep early. */
@@ -1715,42 +1714,16 @@ static int cleaner_kthread(void *arg)
17151714
*/
17161715
btrfs_delete_unused_bgs(fs_info);
17171716
sleep:
1717+
if (kthread_should_park())
1718+
kthread_parkme();
1719+
if (kthread_should_stop())
1720+
return 0;
17181721
if (!again) {
17191722
set_current_state(TASK_INTERRUPTIBLE);
1720-
if (!kthread_should_stop())
1721-
schedule();
1723+
schedule();
17221724
__set_current_state(TASK_RUNNING);
17231725
}
1724-
} while (!kthread_should_stop());
1725-
1726-
/*
1727-
* Transaction kthread is stopped before us and wakes us up.
1728-
* However we might have started a new transaction and COWed some
1729-
* tree blocks when deleting unused block groups for example. So
1730-
* make sure we commit the transaction we started to have a clean
1731-
* shutdown when evicting the btree inode - if it has dirty pages
1732-
* when we do the final iput() on it, eviction will trigger a
1733-
* writeback for it which will fail with null pointer dereferences
1734-
* since work queues and other resources were already released and
1735-
* destroyed by the time the iput/eviction/writeback is made.
1736-
*/
1737-
trans = btrfs_attach_transaction(root);
1738-
if (IS_ERR(trans)) {
1739-
if (PTR_ERR(trans) != -ENOENT)
1740-
btrfs_err(fs_info,
1741-
"cleaner transaction attach returned %ld",
1742-
PTR_ERR(trans));
1743-
} else {
1744-
int ret;
1745-
1746-
ret = btrfs_commit_transaction(trans);
1747-
if (ret)
1748-
btrfs_err(fs_info,
1749-
"cleaner open transaction commit returned %d",
1750-
ret);
17511726
}
1752-
1753-
return 0;
17541727
}
17551728

17561729
static int transaction_kthread(void *arg)
@@ -3931,6 +3904,13 @@ void close_ctree(struct btrfs_fs_info *fs_info)
39313904
int ret;
39323905

39333906
set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags);
3907+
/*
3908+
* We don't want the cleaner to start new transactions, add more delayed
3909+
* iputs, etc. while we're closing. We can't use kthread_stop() yet
3910+
* because that frees the task_struct, and the transaction kthread might
3911+
* still try to wake up the cleaner.
3912+
*/
3913+
kthread_park(fs_info->cleaner_kthread);
39343914

39353915
/* wait for the qgroup rescan worker to stop */
39363916
btrfs_qgroup_wait_for_completion(fs_info, false);
@@ -3958,9 +3938,8 @@ void close_ctree(struct btrfs_fs_info *fs_info)
39583938

39593939
if (!sb_rdonly(fs_info->sb)) {
39603940
/*
3961-
* If the cleaner thread is stopped and there are
3962-
* block groups queued for removal, the deletion will be
3963-
* skipped when we quit the cleaner thread.
3941+
* The cleaner kthread is stopped, so do one final pass over
3942+
* unused block groups.
39643943
*/
39653944
btrfs_delete_unused_bgs(fs_info);
39663945

@@ -4359,13 +4338,23 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
43594338
unpin = pinned_extents;
43604339
again:
43614340
while (1) {
4341+
/*
4342+
* The btrfs_finish_extent_commit() may get the same range as
4343+
* ours between find_first_extent_bit and clear_extent_dirty.
4344+
* Hence, hold the unused_bg_unpin_mutex to avoid double unpin
4345+
* the same extent range.
4346+
*/
4347+
mutex_lock(&fs_info->unused_bg_unpin_mutex);
43624348
ret = find_first_extent_bit(unpin, 0, &start, &end,
43634349
EXTENT_DIRTY, NULL);
4364-
if (ret)
4350+
if (ret) {
4351+
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
43654352
break;
4353+
}
43664354

43674355
clear_extent_dirty(unpin, start, end);
43684356
btrfs_error_unpin_extent_range(fs_info, start, end);
4357+
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
43694358
cond_resched();
43704359
}
43714360

fs/btrfs/free-space-cache.c

+21-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
7575
* sure NOFS is set to keep us from deadlocking.
7676
*/
7777
nofs_flag = memalloc_nofs_save();
78-
inode = btrfs_iget(fs_info->sb, &location, root, NULL);
78+
inode = btrfs_iget_path(fs_info->sb, &location, root, NULL, path);
79+
btrfs_release_path(path);
7980
memalloc_nofs_restore(nofs_flag);
8081
if (IS_ERR(inode))
8182
return inode;
@@ -838,6 +839,25 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
838839
path->search_commit_root = 1;
839840
path->skip_locking = 1;
840841

842+
/*
843+
* We must pass a path with search_commit_root set to btrfs_iget in
844+
* order to avoid a deadlock when allocating extents for the tree root.
845+
*
846+
* When we are COWing an extent buffer from the tree root, when looking
847+
* for a free extent, at extent-tree.c:find_free_extent(), we can find
848+
* block group without its free space cache loaded. When we find one
849+
* we must load its space cache which requires reading its free space
850+
* cache's inode item from the root tree. If this inode item is located
851+
* in the same leaf that we started COWing before, then we end up in
852+
* deadlock on the extent buffer (trying to read lock it when we
853+
* previously write locked it).
854+
*
855+
* It's safe to read the inode item using the commit root because
856+
* block groups, once loaded, stay in memory forever (until they are
857+
* removed) as well as their space caches once loaded. New block groups
858+
* once created get their ->cached field set to BTRFS_CACHE_FINISHED so
859+
* we will never try to read their inode item while the fs is mounted.
860+
*/
841861
inode = lookup_free_space_inode(fs_info, block_group, path);
842862
if (IS_ERR(inode)) {
843863
btrfs_free_path(path);

fs/btrfs/inode.c

+24-13
Original file line numberDiff line numberDiff line change
@@ -1531,12 +1531,11 @@ static noinline int run_delalloc_nocow(struct inode *inode,
15311531
}
15321532
btrfs_release_path(path);
15331533

1534-
if (cur_offset <= end && cow_start == (u64)-1) {
1534+
if (cur_offset <= end && cow_start == (u64)-1)
15351535
cow_start = cur_offset;
1536-
cur_offset = end;
1537-
}
15381536

15391537
if (cow_start != (u64)-1) {
1538+
cur_offset = end;
15401539
ret = cow_file_range(inode, locked_page, cow_start, end, end,
15411540
page_started, nr_written, 1, NULL);
15421541
if (ret)
@@ -3570,10 +3569,11 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
35703569
/*
35713570
* read an inode from the btree into the in-memory inode
35723571
*/
3573-
static int btrfs_read_locked_inode(struct inode *inode)
3572+
static int btrfs_read_locked_inode(struct inode *inode,
3573+
struct btrfs_path *in_path)
35743574
{
35753575
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3576-
struct btrfs_path *path;
3576+
struct btrfs_path *path = in_path;
35773577
struct extent_buffer *leaf;
35783578
struct btrfs_inode_item *inode_item;
35793579
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -3589,15 +3589,18 @@ static int btrfs_read_locked_inode(struct inode *inode)
35893589
if (!ret)
35903590
filled = true;
35913591

3592-
path = btrfs_alloc_path();
3593-
if (!path)
3594-
return -ENOMEM;
3592+
if (!path) {
3593+
path = btrfs_alloc_path();
3594+
if (!path)
3595+
return -ENOMEM;
3596+
}
35953597

35963598
memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
35973599

35983600
ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
35993601
if (ret) {
3600-
btrfs_free_path(path);
3602+
if (path != in_path)
3603+
btrfs_free_path(path);
36013604
return ret;
36023605
}
36033606

@@ -3722,7 +3725,8 @@ static int btrfs_read_locked_inode(struct inode *inode)
37223725
btrfs_ino(BTRFS_I(inode)),
37233726
root->root_key.objectid, ret);
37243727
}
3725-
btrfs_free_path(path);
3728+
if (path != in_path)
3729+
btrfs_free_path(path);
37263730

37273731
if (!maybe_acls)
37283732
cache_no_acl(inode);
@@ -5644,8 +5648,9 @@ static struct inode *btrfs_iget_locked(struct super_block *s,
56445648
/* Get an inode object given its location and corresponding root.
56455649
* Returns in *is_new if the inode was read from disk
56465650
*/
5647-
struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
5648-
struct btrfs_root *root, int *new)
5651+
struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
5652+
struct btrfs_root *root, int *new,
5653+
struct btrfs_path *path)
56495654
{
56505655
struct inode *inode;
56515656

@@ -5656,7 +5661,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
56565661
if (inode->i_state & I_NEW) {
56575662
int ret;
56585663

5659-
ret = btrfs_read_locked_inode(inode);
5664+
ret = btrfs_read_locked_inode(inode, path);
56605665
if (!ret) {
56615666
inode_tree_add(inode);
56625667
unlock_new_inode(inode);
@@ -5678,6 +5683,12 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
56785683
return inode;
56795684
}
56805685

5686+
struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
5687+
struct btrfs_root *root, int *new)
5688+
{
5689+
return btrfs_iget_path(s, location, root, new, NULL);
5690+
}
5691+
56815692
static struct inode *new_simple_dir(struct super_block *s,
56825693
struct btrfs_key *key,
56835694
struct btrfs_root *root)

fs/btrfs/ioctl.c

+12-2
Original file line numberDiff line numberDiff line change
@@ -3488,6 +3488,8 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
34883488
const u64 sz = BTRFS_I(src)->root->fs_info->sectorsize;
34893489

34903490
len = round_down(i_size_read(src), sz) - loff;
3491+
if (len == 0)
3492+
return 0;
34913493
olen = len;
34923494
}
34933495
}
@@ -4257,9 +4259,17 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
42574259
goto out_unlock;
42584260
if (len == 0)
42594261
olen = len = src->i_size - off;
4260-
/* if we extend to eof, continue to block boundary */
4261-
if (off + len == src->i_size)
4262+
/*
4263+
* If we extend to eof, continue to block boundary if and only if the
4264+
* destination end offset matches the destination file's size, otherwise
4265+
* we would be corrupting data by placing the eof block into the middle
4266+
* of a file.
4267+
*/
4268+
if (off + len == src->i_size) {
4269+
if (!IS_ALIGNED(len, bs) && destoff + len < inode->i_size)
4270+
goto out_unlock;
42624271
len = ALIGN(src->i_size, bs) - off;
4272+
}
42634273

42644274
if (len == 0) {
42654275
ret = 0;

fs/btrfs/super.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -1916,7 +1916,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
19161916
}
19171917

19181918
/* Used to sort the devices by max_avail(descending sort) */
1919-
static int btrfs_cmp_device_free_bytes(const void *dev_info1,
1919+
static inline int btrfs_cmp_device_free_bytes(const void *dev_info1,
19201920
const void *dev_info2)
19211921
{
19221922
if (((struct btrfs_device_info *)dev_info1)->max_avail >
@@ -1945,8 +1945,8 @@ static inline void btrfs_descending_sort_devices(
19451945
* The helper to calc the free space on the devices that can be used to store
19461946
* file data.
19471947
*/
1948-
static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
1949-
u64 *free_bytes)
1948+
static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
1949+
u64 *free_bytes)
19501950
{
19511951
struct btrfs_device_info *devices_info;
19521952
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;

fs/btrfs/tree-checker.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
440440
type != (BTRFS_BLOCK_GROUP_METADATA |
441441
BTRFS_BLOCK_GROUP_DATA)) {
442442
block_group_err(fs_info, leaf, slot,
443-
"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llu or 0x%llx",
443+
"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
444444
type, hweight64(type),
445445
BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
446446
BTRFS_BLOCK_GROUP_SYSTEM,

fs/btrfs/tree-log.c

+17
Original file line numberDiff line numberDiff line change
@@ -4396,6 +4396,23 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
43964396
logged_end = end;
43974397

43984398
list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
4399+
/*
4400+
* Skip extents outside our logging range. It's important to do
4401+
* it for correctness because if we don't ignore them, we may
4402+
* log them before their ordered extent completes, and therefore
4403+
* we could log them without logging their respective checksums
4404+
* (the checksum items are added to the csum tree at the very
4405+
* end of btrfs_finish_ordered_io()). Also leave such extents
4406+
* outside of our range in the list, since we may have another
4407+
* ranged fsync in the near future that needs them. If an extent
4408+
* outside our range corresponds to a hole, log it to avoid
4409+
* leaving gaps between extents (fsck will complain when we are
4410+
* not using the NO_HOLES feature).
4411+
*/
4412+
if ((em->start > end || em->start + em->len <= start) &&
4413+
em->block_start != EXTENT_MAP_HOLE)
4414+
continue;
4415+
43994416
list_del_init(&em->list);
44004417
/*
44014418
* Just an arbitrary number, this can be really CPU intensive

0 commit comments

Comments
 (0)