sys-kernel/coreos-*: fix EREMOTEIO on WRITE SAME errors

This was causing resize2fs failures and delayed block allocation errors
on Oracle OCI.
This commit is contained in:
Benjamin Gilbert 2017-12-05 23:48:50 -08:00
parent 76802b2e8f
commit e6f1b12ae0
9 changed files with 229 additions and 10 deletions

View File

@ -2,7 +2,7 @@
# Distributed under the terms of the GNU General Public License v2 # Distributed under the terms of the GNU General Public License v2
EAPI=5 EAPI=5
COREOS_SOURCE_REVISION="" COREOS_SOURCE_REVISION="-r1"
inherit coreos-kernel inherit coreos-kernel
DESCRIPTION="CoreOS Linux kernel" DESCRIPTION="CoreOS Linux kernel"

View File

@ -2,7 +2,7 @@
# Distributed under the terms of the GNU General Public License v2 # Distributed under the terms of the GNU General Public License v2
EAPI=5 EAPI=5
COREOS_SOURCE_REVISION="" COREOS_SOURCE_REVISION="-r1"
inherit coreos-kernel savedconfig inherit coreos-kernel savedconfig
DESCRIPTION="CoreOS Linux kernel modules" DESCRIPTION="CoreOS Linux kernel modules"

View File

@ -35,4 +35,6 @@ UNIPATCH_LIST="
${PATCH_DIR}/z0002-Add-arm64-coreos-verity-hash.patch \ ${PATCH_DIR}/z0002-Add-arm64-coreos-verity-hash.patch \
${PATCH_DIR}/z0003-KVM-Remove-I-O-port-0x80-bypass-on-intel-hosts.patch \ ${PATCH_DIR}/z0003-KVM-Remove-I-O-port-0x80-bypass-on-intel-hosts.patch \
${PATCH_DIR}/z0004-dccp-CVE-2017-8824-use-after-free-in-DCCP-code.patch \ ${PATCH_DIR}/z0004-dccp-CVE-2017-8824-use-after-free-in-DCCP-code.patch \
${PATCH_DIR}/z0005-block-factor-out-__blkdev_issue_zero_pages.patch \
${PATCH_DIR}/z0006-block-cope-with-WRITE-ZEROES-failing-in-blkdev_issue.patch \
" "

View File

@ -1,7 +1,7 @@
From 51a1127fb1ac44395f477a19b7e866ca68f19d0c Mon Sep 17 00:00:00 2001 From 51a1127fb1ac44395f477a19b7e866ca68f19d0c Mon Sep 17 00:00:00 2001
From: Vito Caputo <vito.caputo@coreos.com> From: Vito Caputo <vito.caputo@coreos.com>
Date: Wed, 25 Nov 2015 02:59:45 -0800 Date: Wed, 25 Nov 2015 02:59:45 -0800
Subject: [PATCH 1/4] kbuild: derive relative path for KBUILD_SRC from CURDIR Subject: [PATCH 1/6] kbuild: derive relative path for KBUILD_SRC from CURDIR
This enables relocating source and build trees to different roots, This enables relocating source and build trees to different roots,
provided they stay reachable relative to one another. Useful for provided they stay reachable relative to one another. Useful for
@ -26,5 +26,5 @@ index ba1648c093fe..805a34dab5bd 100644
# Leave processing to above invocation of make # Leave processing to above invocation of make
-- --
2.14.1 2.13.6

View File

@ -1,7 +1,7 @@
From 6f18813f11d2ebbb8a083c4af1f65f71e2457ca6 Mon Sep 17 00:00:00 2001 From 6f18813f11d2ebbb8a083c4af1f65f71e2457ca6 Mon Sep 17 00:00:00 2001
From: Geoff Levand <geoff@infradead.org> From: Geoff Levand <geoff@infradead.org>
Date: Fri, 11 Nov 2016 17:28:52 -0800 Date: Fri, 11 Nov 2016 17:28:52 -0800
Subject: [PATCH 2/4] Add arm64 coreos verity hash Subject: [PATCH 2/6] Add arm64 coreos verity hash
Signed-off-by: Geoff Levand <geoff@infradead.org> Signed-off-by: Geoff Levand <geoff@infradead.org>
--- ---
@ -25,5 +25,5 @@ index 613fc3000677..fdaf86c78332 100644
/* /*
* The debug table is referenced via its Relative Virtual Address (RVA), * The debug table is referenced via its Relative Virtual Address (RVA),
-- --
2.14.1 2.13.6

View File

@ -1,7 +1,7 @@
From 8fabd9ceb90c5d6fa66b57477bbc791f4e37730f Mon Sep 17 00:00:00 2001 From 8fabd9ceb90c5d6fa66b57477bbc791f4e37730f Mon Sep 17 00:00:00 2001
From: Andrew Honig <ahonig@google.com> From: Andrew Honig <ahonig@google.com>
Date: Wed, 29 Nov 2017 10:54:24 -0800 Date: Wed, 29 Nov 2017 10:54:24 -0800
Subject: [PATCH 3/4] KVM: Remove I/O port 0x80 bypass on intel hosts. Subject: [PATCH 3/6] KVM: Remove I/O port 0x80 bypass on intel hosts.
KVM allows guests to directly access I/O port 0x80 on intel hosts. If KVM allows guests to directly access I/O port 0x80 on intel hosts. If
the guest floods this port with writes it generates exceptions and the guest floods this port with writes it generates exceptions and
@ -77,5 +77,5 @@ index b21113bcf227..7242184fd8fd 100644
memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
-- --
2.14.1 2.13.6

View File

@ -1,7 +1,7 @@
From 088cbc310cfcf3324ae5f4e092d527707cdb4ce0 Mon Sep 17 00:00:00 2001 From 088cbc310cfcf3324ae5f4e092d527707cdb4ce0 Mon Sep 17 00:00:00 2001
From: Mohamed Ghannam <simo.ghannam@gmail.com> From: Mohamed Ghannam <simo.ghannam@gmail.com>
Date: Tue, 5 Dec 2017 12:23:04 -0800 Date: Tue, 5 Dec 2017 12:23:04 -0800
Subject: [PATCH 4/4] dccp: CVE-2017-8824: use-after-free in DCCP code Subject: [PATCH 4/6] dccp: CVE-2017-8824: use-after-free in DCCP code
Whenever the sock object is in DCCP_CLOSED state, dccp_disconnect() Whenever the sock object is in DCCP_CLOSED state, dccp_disconnect()
must free dccps_hc_tx_ccid and dccps_hc_rx_ccid and set to NULL. must free dccps_hc_tx_ccid and dccps_hc_rx_ccid and set to NULL.
@ -36,5 +36,5 @@ index b68168fcc06a..9d43c1f40274 100644
__skb_queue_purge(&sk->sk_receive_queue); __skb_queue_purge(&sk->sk_receive_queue);
__skb_queue_purge(&sk->sk_write_queue); __skb_queue_purge(&sk->sk_write_queue);
-- --
2.14.1 2.13.6

View File

@ -0,0 +1,107 @@
From a21a8b276ac889e817ea9d36cd36e9f6de242c54 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 16 Oct 2017 15:59:09 +0200
Subject: [PATCH 5/6] block: factor out __blkdev_issue_zero_pages()
blkdev_issue_zeroout() will use this in !BLKDEV_ZERO_NOFALLBACK case.
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
block/blk-lib.c | 63 +++++++++++++++++++++++++++++++++------------------------
1 file changed, 37 insertions(+), 26 deletions(-)
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 63fb971d6574..9e86a4871b0f 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -275,6 +275,40 @@ static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
return min(pages, (sector_t)BIO_MAX_PAGES);
}
+static int __blkdev_issue_zero_pages(struct block_device *bdev,
+ sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
+ struct bio **biop)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+ struct bio *bio = *biop;
+ int bi_size = 0;
+ unsigned int sz;
+
+ if (!q)
+ return -ENXIO;
+
+ while (nr_sects != 0) {
+ bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
+ gfp_mask);
+ bio->bi_iter.bi_sector = sector;
+ bio_set_dev(bio, bdev);
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+
+ while (nr_sects != 0) {
+ sz = min((sector_t) PAGE_SIZE, nr_sects << 9);
+ bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0);
+ nr_sects -= bi_size >> 9;
+ sector += bi_size >> 9;
+ if (bi_size < sz)
+ break;
+ }
+ cond_resched();
+ }
+
+ *biop = bio;
+ return 0;
+}
+
/**
* __blkdev_issue_zeroout - generate number of zero filed write bios
* @bdev: blockdev to issue
@@ -305,9 +339,6 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
unsigned flags)
{
int ret;
- int bi_size = 0;
- struct bio *bio = *biop;
- unsigned int sz;
sector_t bs_mask;
bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
@@ -317,30 +348,10 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
biop, flags);
if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK))
- goto out;
-
- ret = 0;
- while (nr_sects != 0) {
- bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
- gfp_mask);
- bio->bi_iter.bi_sector = sector;
- bio_set_dev(bio, bdev);
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
-
- while (nr_sects != 0) {
- sz = min((sector_t) PAGE_SIZE, nr_sects << 9);
- bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0);
- nr_sects -= bi_size >> 9;
- sector += bi_size >> 9;
- if (bi_size < sz)
- break;
- }
- cond_resched();
- }
+ return ret;
- *biop = bio;
-out:
- return ret;
+ return __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask,
+ biop);
}
EXPORT_SYMBOL(__blkdev_issue_zeroout);
--
2.13.6

View File

@ -0,0 +1,110 @@
From 0407ac43e843fb913125301326aecbdd53f99a93 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 16 Oct 2017 15:59:10 +0200
Subject: [PATCH 6/6] block: cope with WRITE ZEROES failing in
blkdev_issue_zeroout()
sd_config_write_same() ignores ->max_ws_blocks == 0 and resets it to
permit trying WRITE SAME on older SCSI devices, unless ->no_write_same
is set. Because REQ_OP_WRITE_ZEROES is implemented in terms of WRITE
SAME, blkdev_issue_zeroout() may fail with -EREMOTEIO:
$ fallocate -zn -l 1k /dev/sdg
fallocate: fallocate failed: Remote I/O error
$ fallocate -zn -l 1k /dev/sdg # OK
$ fallocate -zn -l 1k /dev/sdg # OK
The following calls succeed because sd_done() sets ->no_write_same in
response to a sense that would become BLK_STS_TARGET/-EREMOTEIO, causing
__blkdev_issue_zeroout() to fall back to generating ZERO_PAGE bios.
This means blkdev_issue_zeroout() must cope with WRITE ZEROES failing
and fall back to manually zeroing, unless BLKDEV_ZERO_NOFALLBACK is
specified. For BLKDEV_ZERO_NOFALLBACK case, return -EOPNOTSUPP if
sd_done() has just set ->no_write_same thus indicating lack of offload
support.
Fixes: c20cfc27a473 ("block: stop using blkdev_issue_write_same for zeroing")
Cc: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
block/blk-lib.c | 45 +++++++++++++++++++++++++++++++++++----------
1 file changed, 35 insertions(+), 10 deletions(-)
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 9e86a4871b0f..2bc544ce3d2e 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -322,12 +322,6 @@ static int __blkdev_issue_zero_pages(struct block_device *bdev,
* Zero-fill a block range, either using hardware offload or by explicitly
* writing zeroes to the device.
*
- * Note that this function may fail with -EOPNOTSUPP if the driver signals
- * zeroing offload support, but the device fails to process the command (for
- * some devices there is no non-destructive way to verify whether this
- * operation is actually supported). In this case the caller should call
- * retry the call to blkdev_issue_zeroout() and the fallback path will be used.
- *
* If a device is using logical block provisioning, the underlying space will
* not be released if %flags contains BLKDEV_ZERO_NOUNMAP.
*
@@ -371,18 +365,49 @@ EXPORT_SYMBOL(__blkdev_issue_zeroout);
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned flags)
{
- int ret;
- struct bio *bio = NULL;
+ int ret = 0;
+ sector_t bs_mask;
+ struct bio *bio;
struct blk_plug plug;
+ bool try_write_zeroes = !!bdev_write_zeroes_sectors(bdev);
+ bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
+ if ((sector | nr_sects) & bs_mask)
+ return -EINVAL;
+
+retry:
+ bio = NULL;
blk_start_plug(&plug);
- ret = __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask,
- &bio, flags);
+ if (try_write_zeroes) {
+ ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects,
+ gfp_mask, &bio, flags);
+ } else if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
+ ret = __blkdev_issue_zero_pages(bdev, sector, nr_sects,
+ gfp_mask, &bio);
+ } else {
+ /* No zeroing offload support */
+ ret = -EOPNOTSUPP;
+ }
if (ret == 0 && bio) {
ret = submit_bio_wait(bio);
bio_put(bio);
}
blk_finish_plug(&plug);
+ if (ret && try_write_zeroes) {
+ if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
+ try_write_zeroes = false;
+ goto retry;
+ }
+ if (!bdev_write_zeroes_sectors(bdev)) {
+ /*
+ * Zeroing offload support was indicated, but the
+ * device reported ILLEGAL REQUEST (for some devices
+ * there is no non-destructive way to verify whether
+ * WRITE ZEROES is actually supported).
+ */
+ ret = -EOPNOTSUPP;
+ }
+ }
return ret;
}
--
2.13.6