mirror of
https://github.com/flatcar/scripts.git
synced 2025-08-15 08:56:58 +02:00
Merge pull request #2315 from flatcar/bug-847-kernel-fix-backport
sys-kernel/coreos-sources: Add backport of bugfix for #847
This commit is contained in:
commit
1db12d110d
1
sdk_container/src/third_party/coreos-overlay/changelog/bugfixes/2022-12-06-kernel-bug-847.md
vendored
Normal file
1
sdk_container/src/third_party/coreos-overlay/changelog/bugfixes/2022-12-06-kernel-bug-847.md
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
- Fix "ext4 deadlock under heavy I/O load" kernel issue. The patch for this is included provisionally while we wait for it to be merged upstream ([Flatcar#847](https://github.com/flatcar/Flatcar/issues/847), [coreos-overlay#2315](https://github.com/flatcar/coreos-overlay/pull/2315))
|
@ -42,4 +42,5 @@ UNIPATCH_LIST="
|
|||||||
${PATCH_DIR}/z0005-Drivers-hv-vmbus-Propagate-VMbus-coherence-to-each-V.patch \
|
${PATCH_DIR}/z0005-Drivers-hv-vmbus-Propagate-VMbus-coherence-to-each-V.patch \
|
||||||
${PATCH_DIR}/z0006-PCI-hv-Avoid-the-retarget-interrupt-hypercall-in-irq.patch \
|
${PATCH_DIR}/z0006-PCI-hv-Avoid-the-retarget-interrupt-hypercall-in-irq.patch \
|
||||||
${PATCH_DIR}/z0007-PCI-hv-Remove-unused-hv_set_msi_entry_from_desc.patch \
|
${PATCH_DIR}/z0007-PCI-hv-Remove-unused-hv_set_msi_entry_from_desc.patch \
|
||||||
|
${PATCH_DIR}/z0008-ext4-Fix-deadlock-due-to-mbcache-en.patch \
|
||||||
"
|
"
|
||||||
|
@ -0,0 +1,129 @@
|
|||||||
|
From e7ec42e181c6213d1fd71b946196f05af601ba5c Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jan Kara <jack@suse.cz>
|
||||||
|
Date: Mon, 21 Nov 2022 15:44:10 +0100
|
||||||
|
Subject: [PATCH] ext4: Fix deadlock due to mbcache entry corruption
|
||||||
|
|
||||||
|
When manipulating xattr blocks, we can deadlock infinitely looping
|
||||||
|
inside ext4_xattr_block_set() where we constantly keep finding xattr
|
||||||
|
block for reuse in mbcache but we are unable to reuse it because its
|
||||||
|
reference count is too big. This happens because cache entry for the
|
||||||
|
xattr block is marked as reusable (e_reusable set) although its
|
||||||
|
reference count is too big. When this inconsistency happens, this
|
||||||
|
inconsistent state is kept indefinitely and so ext4_xattr_block_set()
|
||||||
|
keeps retrying indefinitely.
|
||||||
|
|
||||||
|
The inconsistent state is caused by non-atomic update of e_reusable bit.
|
||||||
|
e_reusable is part of a bitfield and e_reusable update can race with
|
||||||
|
update of e_referenced bit in the same bitfield resulting in loss of one
|
||||||
|
of the updates. Fix the problem by using atomic bitops instead.
|
||||||
|
|
||||||
|
[jeremi: backport from here https://lore.kernel.org/linux-ext4/20221122174807.GA9658@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net/]
|
||||||
|
CC: stable@vger.kernel.org
|
||||||
|
Fixes: 6048c64b2609 ("mbcache: add reusable flag to cache entries")
|
||||||
|
Reported-by: Jeremi Piotrowski <jpiotrowski@linux.microsoft.com>
|
||||||
|
Reported-by: Thilo Fromm <t-lo@linux.microsoft.com>
|
||||||
|
Signed-off-by: Jan Kara <jack@suse.cz>
|
||||||
|
---
|
||||||
|
fs/ext4/xattr.c | 4 ++--
|
||||||
|
fs/mbcache.c | 14 ++++++++------
|
||||||
|
include/linux/mbcache.h | 9 +++++++--
|
||||||
|
3 files changed, 17 insertions(+), 10 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
|
||||||
|
index 533216e80fa2..22700812a4d3 100644
|
||||||
|
--- a/fs/ext4/xattr.c
|
||||||
|
+++ b/fs/ext4/xattr.c
|
||||||
|
@@ -1281,7 +1281,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
|
||||||
|
ce = mb_cache_entry_get(ea_block_cache, hash,
|
||||||
|
bh->b_blocknr);
|
||||||
|
if (ce) {
|
||||||
|
- ce->e_reusable = 1;
|
||||||
|
+ set_bit(MBE_REUSABLE_B, &ce->e_flags);
|
||||||
|
mb_cache_entry_put(ea_block_cache, ce);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -2042,7 +2042,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
|
||||||
|
}
|
||||||
|
BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
|
||||||
|
if (ref == EXT4_XATTR_REFCOUNT_MAX)
|
||||||
|
- ce->e_reusable = 0;
|
||||||
|
+ clear_bit(MBE_REUSABLE_B, &ce->e_flags);
|
||||||
|
ea_bdebug(new_bh, "reusing; refcount now=%d",
|
||||||
|
ref);
|
||||||
|
ext4_xattr_block_csum_set(inode, new_bh);
|
||||||
|
diff --git a/fs/mbcache.c b/fs/mbcache.c
|
||||||
|
index 2010bc80a3f2..ac07b50ea3df 100644
|
||||||
|
--- a/fs/mbcache.c
|
||||||
|
+++ b/fs/mbcache.c
|
||||||
|
@@ -94,8 +94,9 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
|
||||||
|
atomic_set(&entry->e_refcnt, 1);
|
||||||
|
entry->e_key = key;
|
||||||
|
entry->e_value = value;
|
||||||
|
- entry->e_reusable = reusable;
|
||||||
|
- entry->e_referenced = 0;
|
||||||
|
+ entry->e_flags = 0;
|
||||||
|
+ if (reusable)
|
||||||
|
+ set_bit(MBE_REUSABLE_B, &entry->e_flags);
|
||||||
|
head = mb_cache_entry_head(cache, key);
|
||||||
|
hlist_bl_lock(head);
|
||||||
|
hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
|
||||||
|
@@ -155,7 +156,8 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
|
||||||
|
while (node) {
|
||||||
|
entry = hlist_bl_entry(node, struct mb_cache_entry,
|
||||||
|
e_hash_list);
|
||||||
|
- if (entry->e_key == key && entry->e_reusable) {
|
||||||
|
+ if (entry->e_key == key &&
|
||||||
|
+ test_bit(MBE_REUSABLE_B, &entry->e_flags)) {
|
||||||
|
atomic_inc(&entry->e_refcnt);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
@@ -325,7 +327,7 @@ EXPORT_SYMBOL(mb_cache_entry_delete_or_get);
|
||||||
|
void mb_cache_entry_touch(struct mb_cache *cache,
|
||||||
|
struct mb_cache_entry *entry)
|
||||||
|
{
|
||||||
|
- entry->e_referenced = 1;
|
||||||
|
+ set_bit(MBE_REFERENCED_B, &entry->e_flags);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(mb_cache_entry_touch);
|
||||||
|
|
||||||
|
@@ -350,8 +352,8 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
|
||||||
|
while (nr_to_scan-- && !list_empty(&cache->c_list)) {
|
||||||
|
entry = list_first_entry(&cache->c_list,
|
||||||
|
struct mb_cache_entry, e_list);
|
||||||
|
- if (entry->e_referenced || atomic_read(&entry->e_refcnt) > 2) {
|
||||||
|
- entry->e_referenced = 0;
|
||||||
|
+ if (test_bit(MBE_REFERENCED_B, &entry->e_flags) || atomic_read(&entry->e_refcnt) > 2) {
|
||||||
|
+ clear_bit(MBE_REFERENCED_B, &entry->e_flags);
|
||||||
|
list_move_tail(&entry->e_list, &cache->c_list);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
|
||||||
|
index 8eca7f25c432..62927f7e2588 100644
|
||||||
|
--- a/include/linux/mbcache.h
|
||||||
|
+++ b/include/linux/mbcache.h
|
||||||
|
@@ -10,6 +10,12 @@
|
||||||
|
|
||||||
|
struct mb_cache;
|
||||||
|
|
||||||
|
+/* Cache entry flags */
|
||||||
|
+enum {
|
||||||
|
+ MBE_REFERENCED_B = 0,
|
||||||
|
+ MBE_REUSABLE_B
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
struct mb_cache_entry {
|
||||||
|
/* List of entries in cache - protected by cache->c_list_lock */
|
||||||
|
struct list_head e_list;
|
||||||
|
@@ -18,8 +24,7 @@ struct mb_cache_entry {
|
||||||
|
atomic_t e_refcnt;
|
||||||
|
/* Key in hash - stable during lifetime of the entry */
|
||||||
|
u32 e_key;
|
||||||
|
- u32 e_referenced:1;
|
||||||
|
- u32 e_reusable:1;
|
||||||
|
+ unsigned long e_flags;
|
||||||
|
/* User provided value - stable during lifetime of the entry */
|
||||||
|
u64 e_value;
|
||||||
|
};
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
Loading…
Reference in New Issue
Block a user