Newer
Older
// SPDX-License-Identifier: GPL-2.0
* Copyright (C) 2001 Jens Axboe <axboe@kernel.dk>
*/
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/mempool.h>
#include <linux/workqueue.h>
#include <linux/sched/sysctl.h>
#include <linux/blk-crypto.h>
#include <trace/events/block.h>
static struct biovec_slab {
int nr_vecs;
char *name;
struct kmem_cache *slab;
} bvec_slabs[] __read_mostly = {
{ .nr_vecs = 16, .name = "biovec-16" },
{ .nr_vecs = 64, .name = "biovec-64" },
{ .nr_vecs = 128, .name = "biovec-128" },
{ .nr_vecs = BIO_MAX_VECS, .name = "biovec-max" },
static struct biovec_slab *biovec_slab(unsigned short nr_vecs)
{
switch (nr_vecs) {
/* smaller bios use inline vecs */
case 5 ... 16:
return &bvec_slabs[0];
case 17 ... 64:
return &bvec_slabs[1];
case 65 ... 128:
return &bvec_slabs[2];
case 129 ... BIO_MAX_VECS:
return &bvec_slabs[3];
default:
BUG();
return NULL;
}
}
/*
* fs_bio_set is the bio_set containing bio and iovec memory pools used by
* IO code that does not need private memory pools.
*/
EXPORT_SYMBOL(fs_bio_set);
/*
* Our slab pool management
*/
struct bio_slab {
struct kmem_cache *slab;
unsigned int slab_ref;
unsigned int slab_size;
char name[8];
};
static DEFINE_MUTEX(bio_slab_lock);
static struct bio_slab *create_bio_slab(unsigned int size)
struct bio_slab *bslab = kzalloc(sizeof(*bslab), GFP_KERNEL);
snprintf(bslab->name, sizeof(bslab->name), "bio-%d", size);
bslab->slab = kmem_cache_create(bslab->name, size,
ARCH_KMALLOC_MINALIGN, SLAB_HWCACHE_ALIGN, NULL);
if (!bslab->slab)
goto fail_alloc_slab;
bslab->slab_ref = 1;
bslab->slab_size = size;
if (!xa_err(xa_store(&bio_slabs, size, bslab, GFP_KERNEL)))
return bslab;
fail_alloc_slab:
kfree(bslab);
return NULL;
}
static inline unsigned int bs_bio_slab_size(struct bio_set *bs)
{
return bs->front_pad + sizeof(struct bio) + bs->back_pad;
static struct kmem_cache *bio_find_or_create_slab(struct bio_set *bs)
{
unsigned int size = bs_bio_slab_size(bs);
struct bio_slab *bslab;
mutex_lock(&bio_slab_lock);
bslab = xa_load(&bio_slabs, size);
if (bslab)
bslab->slab_ref++;
else
bslab = create_bio_slab(size);
if (bslab)
return bslab->slab;
return NULL;
}
static void bio_put_slab(struct bio_set *bs)
{
struct bio_slab *bslab = NULL;
unsigned int slab_size = bs_bio_slab_size(bs);
mutex_lock(&bio_slab_lock);
bslab = xa_load(&bio_slabs, slab_size);
if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
goto out;
WARN_ON_ONCE(bslab->slab != bs->bio_slab);
WARN_ON(!bslab->slab_ref);
if (--bslab->slab_ref)
goto out;
kmem_cache_destroy(bslab->slab);
out:
mutex_unlock(&bio_slab_lock);
}
void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs)
BIO_BUG_ON(nr_vecs > BIO_MAX_VECS);
if (nr_vecs == BIO_MAX_VECS)
else if (nr_vecs > BIO_INLINE_VECS)
kmem_cache_free(biovec_slab(nr_vecs)->slab, bv);
/*
* Make the first allocation restricted and don't dump info on allocation
* failures, since we'll fall back to the mempool in case of failure.
*/
static inline gfp_t bvec_alloc_gfp(gfp_t gfp)
{
return (gfp & ~(__GFP_DIRECT_RECLAIM | __GFP_IO)) |
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
gfp_t gfp_mask)
struct biovec_slab *bvs = biovec_slab(*nr_vecs);
if (WARN_ON_ONCE(!bvs))
return NULL;
/*
* Upgrade the nr_vecs request to take full advantage of the allocation.
* We also rely on this in the bvec_free path.
*nr_vecs = bvs->nr_vecs;
* Try a slab allocation first for all smaller allocations. If that
* fails and __GFP_DIRECT_RECLAIM is set retry with the mempool.
* The mempool is sized to handle up to BIO_MAX_VECS entries.
if (*nr_vecs < BIO_MAX_VECS) {
bvl = kmem_cache_alloc(bvs->slab, bvec_alloc_gfp(gfp_mask));
if (likely(bvl) || !(gfp_mask & __GFP_DIRECT_RECLAIM))
*nr_vecs = BIO_MAX_VECS;
void bio_uninit(struct bio *bio)
#ifdef CONFIG_BLK_CGROUP
if (bio->bi_blkg) {
blkg_put(bio->bi_blkg);
bio->bi_blkg = NULL;
}
#endif
if (bio_integrity(bio))
bio_integrity_free(bio);
bio_crypt_free_ctx(bio);
EXPORT_SYMBOL(bio_uninit);
static void bio_free(struct bio *bio)
{
struct bio_set *bs = bio->bi_pool;
void *p;
bio_uninit(bio);
bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs);
/*
* If we have front padding, adjust the bio pointer before freeing
*/
p = bio;
mempool_free(p, &bs->bio_pool);
} else {
/* Bio was allocated by bio_kmalloc() */
kfree(bio);
}
/*
* Users of this function have their own bio allocation. Subsequently,
* they must remember to pair any call to bio_init() with bio_uninit()
* when IO has completed, or when the bio is released.
*/
void bio_init(struct bio *bio, struct bio_vec *table,
unsigned short max_vecs)
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
bio->bi_next = NULL;
bio->bi_bdev = NULL;
bio->bi_opf = 0;
bio->bi_flags = 0;
bio->bi_ioprio = 0;
bio->bi_write_hint = 0;
bio->bi_status = 0;
bio->bi_iter.bi_sector = 0;
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_idx = 0;
bio->bi_iter.bi_bvec_done = 0;
bio->bi_end_io = NULL;
bio->bi_private = NULL;
#ifdef CONFIG_BLK_CGROUP
bio->bi_blkg = NULL;
bio->bi_issue.value = 0;
#ifdef CONFIG_BLK_CGROUP_IOCOST
bio->bi_iocost_cost = 0;
#endif
#endif
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
bio->bi_crypt_context = NULL;
#endif
#ifdef CONFIG_BLK_DEV_INTEGRITY
bio->bi_integrity = NULL;
#endif
bio->bi_vcnt = 0;
atomic_set(&bio->__bi_remaining, 1);
atomic_set(&bio->__bi_cnt, 1);
bio->bi_io_vec = table;
bio->bi_pool = NULL;
EXPORT_SYMBOL(bio_init);
/**
* bio_reset - reinitialize a bio
* @bio: bio to reset
*
* Description:
* After calling bio_reset(), @bio will be in the same state as a freshly
* allocated bio returned bio bio_alloc_bioset() - the only fields that are
* preserved are the ones that are initialized by bio_alloc_bioset(). See
* comment in struct bio.
*/
void bio_reset(struct bio *bio)
{
bio_uninit(bio);
atomic_set(&bio->__bi_remaining, 1);
static struct bio *__bio_chain_endio(struct bio *bio)
struct bio *parent = bio->bi_private;
if (bio->bi_status && !parent->bi_status)
parent->bi_status = bio->bi_status;
return parent;
}
static void bio_chain_endio(struct bio *bio)
{
bio_endio(__bio_chain_endio(bio));
}
/**
* bio_chain - chain bio completions
* @parent: the parent bio of @bio
*
* The caller won't have a bi_end_io called when @bio completes - instead,
* @parent's bi_end_io won't be called until both @parent and @bio have
* completed; the chained bio will also be freed when it completes.
*
* The caller must not set bi_private or bi_end_io in @bio.
*/
void bio_chain(struct bio *bio, struct bio *parent)
{
BUG_ON(bio->bi_private || bio->bi_end_io);
bio->bi_private = parent;
bio->bi_end_io = bio_chain_endio;
bio_inc_remaining(parent);
static void bio_alloc_rescue(struct work_struct *work)
{
struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
struct bio *bio;
while (1) {
spin_lock(&bs->rescue_lock);
bio = bio_list_pop(&bs->rescue_list);
spin_unlock(&bs->rescue_lock);
if (!bio)
break;
submit_bio_noacct(bio);
}
}
static void punt_bios_to_rescuer(struct bio_set *bs)
{
struct bio_list punt, nopunt;
struct bio *bio;
if (WARN_ON_ONCE(!bs->rescue_workqueue))
return;
/*
* In order to guarantee forward progress we must punt only bios that
* were allocated from this bio_set; otherwise, if there was a bio on
* there for a stacking driver higher up in the stack, processing it
* could require allocating bios from this bio_set, and doing that from
* our own rescuer would be bad.
*
* Since bio lists are singly linked, pop them all instead of trying to
* remove from the middle of the list:
*/
bio_list_init(&punt);
bio_list_init(&nopunt);
while ((bio = bio_list_pop(¤t->bio_list[0])))
bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
current->bio_list[0] = nopunt;
bio_list_init(&nopunt);
while ((bio = bio_list_pop(¤t->bio_list[1])))
bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
current->bio_list[1] = nopunt;
spin_lock(&bs->rescue_lock);
bio_list_merge(&bs->rescue_list, &punt);
spin_unlock(&bs->rescue_lock);
queue_work(bs->rescue_workqueue, &bs->rescue_work);
}
* @gfp_mask: the GFP_* mask given to the slab allocator
* @bs: the bio_set to allocate from.
* Allocate a bio from the mempools in @bs.
* If %__GFP_DIRECT_RECLAIM is set then bio_alloc will always be able to
* allocate a bio. This is due to the mempool guarantees. To make this work,
* callers must never allocate more than 1 bio at a time from the general pool.
* Callers that need to allocate more than 1 bio must always submit the
* previously allocated bio for IO before attempting to allocate a new one.
* Failure to do so can cause deadlocks under memory pressure.
* Note that when running under submit_bio_noacct() (i.e. any block driver),
* bios are not submitted until after you return - see the code in
* submit_bio_noacct() that converts recursion into iteration, to prevent
* stack overflows.
* This would normally mean allocating multiple bios under submit_bio_noacct()
* would be susceptible to deadlocks, but we have
* deadlock avoidance code that resubmits any blocked bios from a rescuer
* thread.
* However, we do not guarantee forward progress for allocations from other
* mempools. Doing multiple allocations from the same mempool under
* submit_bio_noacct() should be avoided - instead, use bio_set's front_pad
* for per bio allocations.
* Returns: Pointer to new bio on success, NULL on failure.
struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned short nr_iovecs,
struct bio_set *bs)
gfp_t saved_gfp = gfp_mask;
/* should not use nobvec bioset for nr_iovecs > 0 */
if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_iovecs > 0))
return NULL;
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
/*
* submit_bio_noacct() converts recursion to iteration; this means if
* we're running beneath it, any bios we allocate and submit will not be
* submitted (and thus freed) until after we return.
*
* This exposes us to a potential deadlock if we allocate multiple bios
* from the same bio_set() while running underneath submit_bio_noacct().
* If we were to allocate multiple bios (say a stacking block driver
* that was splitting bios), we would deadlock if we exhausted the
* mempool's reserve.
*
* We solve this, and guarantee forward progress, with a rescuer
* workqueue per bio_set. If we go to allocate and there are bios on
* current->bio_list, we first try the allocation without
* __GFP_DIRECT_RECLAIM; if that fails, we punt those bios we would be
* blocking to the rescuer workqueue before we retry with the original
* gfp_flags.
*/
if (current->bio_list &&
(!bio_list_empty(¤t->bio_list[0]) ||
!bio_list_empty(¤t->bio_list[1])) &&
bs->rescue_workqueue)
gfp_mask &= ~__GFP_DIRECT_RECLAIM;
p = mempool_alloc(&bs->bio_pool, gfp_mask);
if (!p && gfp_mask != saved_gfp) {
punt_bios_to_rescuer(bs);
gfp_mask = saved_gfp;
p = mempool_alloc(&bs->bio_pool, gfp_mask);
bio = p + bs->front_pad;
if (nr_iovecs > BIO_INLINE_VECS) {
struct bio_vec *bvl = NULL;
bvl = bvec_alloc(&bs->bvec_pool, &nr_iovecs, gfp_mask);
if (!bvl && gfp_mask != saved_gfp) {
punt_bios_to_rescuer(bs);
gfp_mask = saved_gfp;
bvl = bvec_alloc(&bs->bvec_pool, &nr_iovecs, gfp_mask);
bio_init(bio, bvl, nr_iovecs);
} else if (nr_iovecs) {
bio_init(bio, bio->bi_inline_vecs, BIO_INLINE_VECS);
} else {
bio_init(bio, NULL, 0);
bio->bi_pool = bs;
mempool_free(p, &bs->bio_pool);
EXPORT_SYMBOL(bio_alloc_bioset);
/**
* bio_kmalloc - kmalloc a bio for I/O
* @gfp_mask: the GFP_* mask given to the slab allocator
* @nr_iovecs: number of iovecs to pre-allocate
*
* Use kmalloc to allocate and initialize a bio.
*
* Returns: Pointer to new bio on success, NULL on failure.
*/
struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs)
{
struct bio *bio;
if (nr_iovecs > UIO_MAXIOV)
return NULL;
bio = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask);
if (unlikely(!bio))
return NULL;
bio_init(bio, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs);
bio->bi_pool = NULL;
return bio;
}
EXPORT_SYMBOL(bio_kmalloc);
void zero_fill_bio(struct bio *bio)
struct bio_vec bv;
struct bvec_iter iter;
bio_for_each_segment(bv, bio, iter) {
char *data = bvec_kmap_irq(&bv, &flags);
memset(data, 0, bv.bv_len);
flush_dcache_page(bv.bv_page);
/**
* bio_truncate - truncate the bio to small size of @new_size
* @bio: the bio to be truncated
* @new_size: new size for truncating the bio
*
* Description:
* Truncate the bio to new size of @new_size. If bio_op(bio) is
* REQ_OP_READ, zero the truncated part. This function should only
* be used for handling corner cases, such as bio eod.
*/
void bio_truncate(struct bio *bio, unsigned new_size)
{
struct bio_vec bv;
struct bvec_iter iter;
unsigned int done = 0;
bool truncated = false;
if (new_size >= bio->bi_iter.bi_size)
return;
if (bio_op(bio) != REQ_OP_READ)
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
goto exit;
bio_for_each_segment(bv, bio, iter) {
if (done + bv.bv_len > new_size) {
unsigned offset;
if (!truncated)
offset = new_size - done;
else
offset = 0;
zero_user(bv.bv_page, offset, bv.bv_len - offset);
truncated = true;
}
done += bv.bv_len;
}
exit:
/*
* Don't touch bvec table here and make it really immutable, since
* fs bio user has to retrieve all pages via bio_for_each_segment_all
* in its .end_bio() callback.
*
* It is enough to truncate bio by updating .bi_size since we can make
* correct bvec with the updated .bi_size for drivers.
*/
bio->bi_iter.bi_size = new_size;
}
/**
* guard_bio_eod - truncate a BIO to fit the block device
* @bio: bio to truncate
*
* This allows us to do IO even on the odd last sectors of a device, even if the
* block size is some multiple of the physical sector size.
*
* We'll just truncate the bio to the size of the device, and clear the end of
* the buffer head manually. Truly out-of-range accesses will turn into actual
* I/O errors, this only handles the "we need to be able to do I/O at the final
* sector" case.
*/
void guard_bio_eod(struct bio *bio)
{
sector_t maxsector = bdev_nr_sectors(bio->bi_bdev);
if (!maxsector)
return;
/*
* If the *whole* IO is past the end of the device,
* let it through, and the IO layer will turn it into
* an EIO.
*/
if (unlikely(bio->bi_iter.bi_sector >= maxsector))
return;
maxsector -= bio->bi_iter.bi_sector;
if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
return;
bio_truncate(bio, maxsector << 9);
}
/**
* bio_put - release a reference to a bio
* @bio: bio to release reference to
*
* Description:
* Put a reference to a &struct bio, either one you have gotten with
* bio_alloc, bio_get or bio_clone_*. The last put of a bio will free it.
if (!bio_flagged(bio, BIO_REFFED))
else {
BIO_BUG_ON(!atomic_read(&bio->__bi_cnt));
/*
* last put frees it
*/
if (atomic_dec_and_test(&bio->__bi_cnt))
bio_free(bio);
}
EXPORT_SYMBOL(bio_put);
/**
* __bio_clone_fast - clone a bio that shares the original bio's biovec
* @bio: destination bio
* @bio_src: bio to clone
*
* Clone a &bio. Caller will own the returned bio, but not
* the actual data it points to. Reference count of returned
* bio will be one.
*
* Caller must ensure that @bio_src is not freed before @bio.
*/
void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
{
WARN_ON_ONCE(bio->bi_pool && bio->bi_max_vecs);
* most users will be overriding ->bi_bdev with a new target,
* so we don't set nor calculate new physical/hw segment counts here
*/
bio->bi_bdev = bio_src->bi_bdev;
bio_set_flag(bio, BIO_CLONED);
if (bio_flagged(bio_src, BIO_THROTTLED))
bio_set_flag(bio, BIO_THROTTLED);
if (bio_flagged(bio_src, BIO_REMAPPED))
bio_set_flag(bio, BIO_REMAPPED);
bio->bi_ioprio = bio_src->bi_ioprio;
bio->bi_write_hint = bio_src->bi_write_hint;
bio->bi_iter = bio_src->bi_iter;
bio->bi_io_vec = bio_src->bi_io_vec;
bio_clone_blkg_association(bio, bio_src);
blkcg_bio_issue_init(bio);
}
EXPORT_SYMBOL(__bio_clone_fast);
/**
* bio_clone_fast - clone a bio that shares the original bio's biovec
* @bio: bio to clone
* @gfp_mask: allocation priority
* @bs: bio_set to allocate from
*
* Like __bio_clone_fast, only also allocates the returned bio
*/
struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
{
struct bio *b;
b = bio_alloc_bioset(gfp_mask, 0, bs);
if (!b)
return NULL;
__bio_clone_fast(b, bio);
if (bio_crypt_clone(b, bio, gfp_mask) < 0)
goto err_put;
if (bio_integrity(bio) &&
bio_integrity_clone(b, bio, gfp_mask) < 0)
goto err_put;
err_put:
bio_put(b);
return NULL;
const char *bio_devname(struct bio *bio, char *buf)
{
return bdevname(bio->bi_bdev, buf);
}
EXPORT_SYMBOL(bio_devname);
static inline bool page_is_mergeable(const struct bio_vec *bv,
struct page *page, unsigned int len, unsigned int off,
Christoph Hellwig
committed
bool *same_page)
size_t bv_end = bv->bv_offset + bv->bv_len;
phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) + bv_end - 1;
phys_addr_t page_addr = page_to_phys(page);
if (vec_end_addr + 1 != page_addr + off)
return false;
if (xen_domain() && !xen_biovec_phys_mergeable(bv, page))
return false;
Christoph Hellwig
committed
*same_page = ((vec_end_addr & PAGE_MASK) == page_addr);
if (*same_page)
return true;
return (bv->bv_page + bv_end / PAGE_SIZE) == (page + off / PAGE_SIZE);
/*
* Try to merge a page into a segment, while obeying the hardware segment
* size limit. This is not for normal read/write bios, but for passthrough
* or Zone Append operations that we can't split.
*/
static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio,
struct page *page, unsigned len,
unsigned offset, bool *same_page)
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
unsigned long mask = queue_segment_boundary(q);
phys_addr_t addr1 = page_to_phys(bv->bv_page) + bv->bv_offset;
phys_addr_t addr2 = page_to_phys(page) + offset + len - 1;
if ((addr1 | mask) != (addr2 | mask))
return false;
if (bv->bv_len + len > queue_max_segment_size(q))
return false;
return __bio_try_merge_page(bio, page, len, offset, same_page);
* bio_add_hw_page - attempt to add a page to a bio with hw constraints
* @q: the target queue
* @bio: destination bio
* @page: page to add
* @len: vec entry length
* @offset: vec entry offset
* @max_sectors: maximum number of sectors that can be added
* @same_page: return if the segment has been merged inside the same page
* Add a page to a bio while respecting the hardware max_sectors, max_segment
* and gap limitations.
int bio_add_hw_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset,
unsigned int max_sectors, bool *same_page)
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
if (bio->bi_vcnt > 0) {
if (bio_try_merge_hw_seg(q, bio, page, len, offset, same_page))
/*
* If the queue doesn't support SG gaps and adding this segment
* would create a gap, disallow it.
*/
bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
if (bvec_gap_to_prev(q, bvec, offset))
return 0;
}
if (bio->bi_vcnt >= queue_max_segments(q))
Maurizio Lombardi
committed
bvec = &bio->bi_io_vec[bio->bi_vcnt];
bvec->bv_page = page;
bvec->bv_len = len;
bvec->bv_offset = offset;
bio->bi_vcnt++;
bio->bi_iter.bi_size += len;
/**
* bio_add_pc_page - attempt to add page to passthrough bio
* @q: the target queue
* @bio: destination bio
* @page: page to add
* @len: vec entry length
* @offset: vec entry offset
*
* Attempt to add a page to the bio_vec maplist. This can fail for a
* number of reasons, such as the bio being full or target block device
* limitations. The target block device must allow bio's up to PAGE_SIZE,
* so it is always possible to add a single page to an empty bio.
*
* This should only be used by passthrough bios.
*/
int bio_add_pc_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset)
{
bool same_page = false;
return bio_add_hw_page(q, bio, page, len, offset,
queue_max_hw_sectors(q), &same_page);
EXPORT_SYMBOL(bio_add_pc_page);
/**
* bio_add_zone_append_page - attempt to add page to zone-append bio
* @bio: destination bio
* @page: page to add
* @len: vec entry length
* @offset: vec entry offset
*
* Attempt to add a page to the bio_vec maplist of a bio that will be submitted
* for a zone-append request. This can fail for a number of reasons, such as the
* bio being full or the target block device is not a zoned block device or
* other limitations of the target block device. The target block device must
* allow bio's up to PAGE_SIZE, so it is always possible to add a single page
* to an empty bio.
*
* Returns: number of bytes added to the bio, or 0 in case of a failure.
*/
int bio_add_zone_append_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int offset)
{
struct request_queue *q = bio->bi_bdev->bd_disk->queue;
bool same_page = false;
if (WARN_ON_ONCE(bio_op(bio) != REQ_OP_ZONE_APPEND))
return 0;
if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
return 0;
return bio_add_hw_page(q, bio, page, len, offset,
queue_max_zone_append_sectors(q), &same_page);
}
EXPORT_SYMBOL_GPL(bio_add_zone_append_page);
* __bio_try_merge_page - try appending data to an existing bvec.
* @bio: destination bio
* @page: start page to add
* @len: length of the data to add
* @off: offset of the data relative to @page
Christoph Hellwig
committed
* @same_page: return if the segment has been merged inside the same page
* Try to add the data at @page + @off to the last bvec of @bio. This is a
* useful optimisation for file systems with a block size smaller than the
* Warn if (@len, @off) crosses pages in case that @same_page is true.
*
* Return %true on success or %false on failure.
bool __bio_try_merge_page(struct bio *bio, struct page *page,
Christoph Hellwig
committed
unsigned int len, unsigned int off, bool *same_page)
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
if (bio->bi_vcnt > 0) {
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
if (page_is_mergeable(bv, page, len, off, same_page)) {
if (bio->bi_iter.bi_size > UINT_MAX - len) {
*same_page = false;
}
bv->bv_len += len;
bio->bi_iter.bi_size += len;
return true;
}
return false;
}
EXPORT_SYMBOL_GPL(__bio_try_merge_page);
* __bio_add_page - add page(s) to a bio in a new segment
* @bio: destination bio
* @page: start page to add
* @len: length of the data to add, may cross pages
* @off: offset of the data relative to @page, may cross pages
*
* Add the data at @page + @off to @bio as a new bvec. The caller must ensure
* that @bio has space for another bvec.
*/
void __bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int off)
{
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
bv->bv_page = page;
bv->bv_offset = off;
bv->bv_len = len;
if (!bio_flagged(bio, BIO_WORKINGSET) && unlikely(PageWorkingset(page)))
bio_set_flag(bio, BIO_WORKINGSET);
}
EXPORT_SYMBOL_GPL(__bio_add_page);
/**
* bio_add_page - attempt to add page(s) to bio
* @bio: destination bio
* @page: start page to add
* @len: vec entry length, may cross pages
* @offset: vec entry offset relative to @page, may cross pages
* Attempt to add page(s) to the bio_vec maplist. This will only fail
* if either bio->bi_vcnt == bio->bi_max_vecs or it's a cloned bio.
*/
int bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int offset)
{
Christoph Hellwig
committed
bool same_page = false;
if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) {
return 0;
__bio_add_page(bio, page, len, offset);
}
EXPORT_SYMBOL(bio_add_page);
void bio_release_pages(struct bio *bio, bool mark_dirty)
{
struct bvec_iter_all iter_all;
struct bio_vec *bvec;
if (bio_flagged(bio, BIO_NO_PAGE_REF))
return;
bio_for_each_segment_all(bvec, bio, iter_all) {
if (mark_dirty && !PageCompound(bvec->bv_page))
set_page_dirty_lock(bvec->bv_page);
put_page(bvec->bv_page);
}
EXPORT_SYMBOL_GPL(bio_release_pages);
static void __bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
WARN_ON_ONCE(bio->bi_max_vecs);
bio->bi_vcnt = iter->nr_segs;
bio->bi_io_vec = (struct bio_vec *)iter->bvec;
bio->bi_iter.bi_bvec_done = iter->iov_offset;
bio->bi_iter.bi_size = iter->count;
bio_set_flag(bio, BIO_NO_PAGE_REF);
bio_set_flag(bio, BIO_CLONED);
static int bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
{
__bio_iov_bvec_set(bio, iter);
iov_iter_advance(iter, iter->count);
static int bio_iov_bvec_set_append(struct bio *bio, struct iov_iter *iter)
{