Newer
Older
// SPDX-License-Identifier: GPL-2.0
* Copyright (C) 2001 Jens Axboe <axboe@kernel.dk>
*/
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/mempool.h>
#include <linux/workqueue.h>
#include <linux/sched/sysctl.h>
#include <linux/blk-crypto.h>
#include <trace/events/block.h>
static struct biovec_slab {
int nr_vecs;
char *name;
struct kmem_cache *slab;
} bvec_slabs[] __read_mostly = {
{ .nr_vecs = 16, .name = "biovec-16" },
{ .nr_vecs = 64, .name = "biovec-64" },
{ .nr_vecs = 128, .name = "biovec-128" },
{ .nr_vecs = BIO_MAX_VECS, .name = "biovec-max" },
static struct biovec_slab *biovec_slab(unsigned short nr_vecs)
{
switch (nr_vecs) {
/* smaller bios use inline vecs */
case 5 ... 16:
return &bvec_slabs[0];
case 17 ... 64:
return &bvec_slabs[1];
case 65 ... 128:
return &bvec_slabs[2];
case 129 ... BIO_MAX_VECS:
return &bvec_slabs[3];
default:
BUG();
return NULL;
}
}
/*
* fs_bio_set is the bio_set containing bio and iovec memory pools used by
* IO code that does not need private memory pools.
*/
EXPORT_SYMBOL(fs_bio_set);
/*
* Our slab pool management
*/
struct bio_slab {
struct kmem_cache *slab;
unsigned int slab_ref;
unsigned int slab_size;
char name[8];
};
static DEFINE_MUTEX(bio_slab_lock);
static struct bio_slab *create_bio_slab(unsigned int size)
struct bio_slab *bslab = kzalloc(sizeof(*bslab), GFP_KERNEL);
snprintf(bslab->name, sizeof(bslab->name), "bio-%d", size);
bslab->slab = kmem_cache_create(bslab->name, size,
ARCH_KMALLOC_MINALIGN,
SLAB_HWCACHE_ALIGN | SLAB_TYPESAFE_BY_RCU, NULL);
if (!bslab->slab)
goto fail_alloc_slab;
bslab->slab_ref = 1;
bslab->slab_size = size;
if (!xa_err(xa_store(&bio_slabs, size, bslab, GFP_KERNEL)))
return bslab;
fail_alloc_slab:
kfree(bslab);
return NULL;
}
static inline unsigned int bs_bio_slab_size(struct bio_set *bs)
{
return bs->front_pad + sizeof(struct bio) + bs->back_pad;
static struct kmem_cache *bio_find_or_create_slab(struct bio_set *bs)
{
unsigned int size = bs_bio_slab_size(bs);
struct bio_slab *bslab;
mutex_lock(&bio_slab_lock);
bslab = xa_load(&bio_slabs, size);
if (bslab)
bslab->slab_ref++;
else
bslab = create_bio_slab(size);
if (bslab)
return bslab->slab;
return NULL;
}
static void bio_put_slab(struct bio_set *bs)
{
struct bio_slab *bslab = NULL;
unsigned int slab_size = bs_bio_slab_size(bs);
mutex_lock(&bio_slab_lock);
bslab = xa_load(&bio_slabs, slab_size);
if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
goto out;
WARN_ON_ONCE(bslab->slab != bs->bio_slab);
WARN_ON(!bslab->slab_ref);
if (--bslab->slab_ref)
goto out;
kmem_cache_destroy(bslab->slab);
out:
mutex_unlock(&bio_slab_lock);
}
void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs)
if (nr_vecs == BIO_MAX_VECS)
else if (nr_vecs > BIO_INLINE_VECS)
kmem_cache_free(biovec_slab(nr_vecs)->slab, bv);
/*
* Make the first allocation restricted and don't dump info on allocation
* failures, since we'll fall back to the mempool in case of failure.
*/
static inline gfp_t bvec_alloc_gfp(gfp_t gfp)
{
return (gfp & ~(__GFP_DIRECT_RECLAIM | __GFP_IO)) |
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
gfp_t gfp_mask)
struct biovec_slab *bvs = biovec_slab(*nr_vecs);
if (WARN_ON_ONCE(!bvs))
return NULL;
/*
* Upgrade the nr_vecs request to take full advantage of the allocation.
* We also rely on this in the bvec_free path.
*nr_vecs = bvs->nr_vecs;
* Try a slab allocation first for all smaller allocations. If that
* fails and __GFP_DIRECT_RECLAIM is set retry with the mempool.
* The mempool is sized to handle up to BIO_MAX_VECS entries.
if (*nr_vecs < BIO_MAX_VECS) {
bvl = kmem_cache_alloc(bvs->slab, bvec_alloc_gfp(gfp_mask));
if (likely(bvl) || !(gfp_mask & __GFP_DIRECT_RECLAIM))
*nr_vecs = BIO_MAX_VECS;
void bio_uninit(struct bio *bio)
#ifdef CONFIG_BLK_CGROUP
if (bio->bi_blkg) {
blkg_put(bio->bi_blkg);
bio->bi_blkg = NULL;
}
#endif
if (bio_integrity(bio))
bio_integrity_free(bio);
bio_crypt_free_ctx(bio);
EXPORT_SYMBOL(bio_uninit);
static void bio_free(struct bio *bio)
{
struct bio_set *bs = bio->bi_pool;
void *p;
bio_uninit(bio);
bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs);
/*
* If we have front padding, adjust the bio pointer before freeing
*/
p = bio;
mempool_free(p, &bs->bio_pool);
} else {
/* Bio was allocated by bio_kmalloc() */
kfree(bio);
}
/*
* Users of this function have their own bio allocation. Subsequently,
* they must remember to pair any call to bio_init() with bio_uninit()
* when IO has completed, or when the bio is released.
*/
void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
unsigned short max_vecs, unsigned int opf)
bio->bi_bdev = bdev;
bio->bi_opf = opf;
bio->bi_flags = 0;
bio->bi_ioprio = 0;
bio->bi_status = 0;
bio->bi_iter.bi_sector = 0;
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_idx = 0;
bio->bi_iter.bi_bvec_done = 0;
bio->bi_end_io = NULL;
bio->bi_private = NULL;
#ifdef CONFIG_BLK_CGROUP
bio->bi_blkg = NULL;
bio->bi_issue.value = 0;
if (bdev)
bio_associate_blkg(bio);
#ifdef CONFIG_BLK_CGROUP_IOCOST
bio->bi_iocost_cost = 0;
#endif
#endif
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
bio->bi_crypt_context = NULL;
#endif
#ifdef CONFIG_BLK_DEV_INTEGRITY
bio->bi_integrity = NULL;
#endif
bio->bi_vcnt = 0;
atomic_set(&bio->__bi_remaining, 1);
atomic_set(&bio->__bi_cnt, 1);
bio->bi_cookie = BLK_QC_T_NONE;
bio->bi_io_vec = table;
bio->bi_pool = NULL;
EXPORT_SYMBOL(bio_init);
/**
* bio_reset - reinitialize a bio
* @bio: bio to reset
* @bdev: block device to use the bio for
* @opf: operation and flags for bio
*
* Description:
* After calling bio_reset(), @bio will be in the same state as a freshly
* allocated bio returned bio bio_alloc_bioset() - the only fields that are
* preserved are the ones that are initialized by bio_alloc_bioset(). See
* comment in struct bio.
*/
void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf)
bio_uninit(bio);
atomic_set(&bio->__bi_remaining, 1);
bio->bi_bdev = bdev;
if (bio->bi_bdev)
bio_associate_blkg(bio);
static struct bio *__bio_chain_endio(struct bio *bio)
struct bio *parent = bio->bi_private;
if (bio->bi_status && !parent->bi_status)
parent->bi_status = bio->bi_status;
return parent;
}
static void bio_chain_endio(struct bio *bio)
{
bio_endio(__bio_chain_endio(bio));
}
/**
* bio_chain - chain bio completions
* @parent: the parent bio of @bio
*
* The caller won't have a bi_end_io called when @bio completes - instead,
* @parent's bi_end_io won't be called until both @parent and @bio have
* completed; the chained bio will also be freed when it completes.
*
* The caller must not set bi_private or bi_end_io in @bio.
*/
void bio_chain(struct bio *bio, struct bio *parent)
{
BUG_ON(bio->bi_private || bio->bi_end_io);
bio->bi_private = parent;
bio->bi_end_io = bio_chain_endio;
bio_inc_remaining(parent);
struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
unsigned int nr_pages, unsigned int opf, gfp_t gfp)
struct bio *new = bio_alloc(bdev, nr_pages, opf, gfp);
if (bio) {
bio_chain(bio, new);
submit_bio(bio);
}
return new;
}
EXPORT_SYMBOL_GPL(blk_next_bio);
static void bio_alloc_rescue(struct work_struct *work)
{
struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
struct bio *bio;
while (1) {
spin_lock(&bs->rescue_lock);
bio = bio_list_pop(&bs->rescue_list);
spin_unlock(&bs->rescue_lock);
if (!bio)
break;
submit_bio_noacct(bio);
}
}
static void punt_bios_to_rescuer(struct bio_set *bs)
{
struct bio_list punt, nopunt;
struct bio *bio;
if (WARN_ON_ONCE(!bs->rescue_workqueue))
return;
/*
* In order to guarantee forward progress we must punt only bios that
* were allocated from this bio_set; otherwise, if there was a bio on
* there for a stacking driver higher up in the stack, processing it
* could require allocating bios from this bio_set, and doing that from
* our own rescuer would be bad.
*
* Since bio lists are singly linked, pop them all instead of trying to
* remove from the middle of the list:
*/
bio_list_init(&punt);
bio_list_init(&nopunt);
while ((bio = bio_list_pop(¤t->bio_list[0])))
bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
current->bio_list[0] = nopunt;
bio_list_init(&nopunt);
while ((bio = bio_list_pop(¤t->bio_list[1])))
bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
current->bio_list[1] = nopunt;
spin_lock(&bs->rescue_lock);
bio_list_merge(&bs->rescue_list, &punt);
spin_unlock(&bs->rescue_lock);
queue_work(bs->rescue_workqueue, &bs->rescue_work);
}
* @bdev: block device to allocate the bio for (can be %NULL)
* @nr_vecs: number of bvecs to pre-allocate
* @opf: operation and flags for bio
* @gfp_mask: the GFP_* mask given to the slab allocator
* @bs: the bio_set to allocate from.
* Allocate a bio from the mempools in @bs.
* If %__GFP_DIRECT_RECLAIM is set then bio_alloc will always be able to
* allocate a bio. This is due to the mempool guarantees. To make this work,
* callers must never allocate more than 1 bio at a time from the general pool.
* Callers that need to allocate more than 1 bio must always submit the
* previously allocated bio for IO before attempting to allocate a new one.
* Failure to do so can cause deadlocks under memory pressure.
* Note that when running under submit_bio_noacct() (i.e. any block driver),
* bios are not submitted until after you return - see the code in
* submit_bio_noacct() that converts recursion into iteration, to prevent
* stack overflows.
* This would normally mean allocating multiple bios under submit_bio_noacct()
* would be susceptible to deadlocks, but we have
* deadlock avoidance code that resubmits any blocked bios from a rescuer
* thread.
* However, we do not guarantee forward progress for allocations from other
* mempools. Doing multiple allocations from the same mempool under
* submit_bio_noacct() should be avoided - instead, use bio_set's front_pad
* for per bio allocations.
* Returns: Pointer to new bio on success, NULL on failure.
struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
unsigned int opf, gfp_t gfp_mask,
struct bio_set *bs)
gfp_t saved_gfp = gfp_mask;
/* should not use nobvec bioset for nr_vecs > 0 */
if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_vecs > 0))
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
/*
* submit_bio_noacct() converts recursion to iteration; this means if
* we're running beneath it, any bios we allocate and submit will not be
* submitted (and thus freed) until after we return.
*
* This exposes us to a potential deadlock if we allocate multiple bios
* from the same bio_set() while running underneath submit_bio_noacct().
* If we were to allocate multiple bios (say a stacking block driver
* that was splitting bios), we would deadlock if we exhausted the
* mempool's reserve.
*
* We solve this, and guarantee forward progress, with a rescuer
* workqueue per bio_set. If we go to allocate and there are bios on
* current->bio_list, we first try the allocation without
* __GFP_DIRECT_RECLAIM; if that fails, we punt those bios we would be
* blocking to the rescuer workqueue before we retry with the original
* gfp_flags.
*/
if (current->bio_list &&
(!bio_list_empty(¤t->bio_list[0]) ||
!bio_list_empty(¤t->bio_list[1])) &&
bs->rescue_workqueue)
gfp_mask &= ~__GFP_DIRECT_RECLAIM;
p = mempool_alloc(&bs->bio_pool, gfp_mask);
if (!p && gfp_mask != saved_gfp) {
punt_bios_to_rescuer(bs);
gfp_mask = saved_gfp;
p = mempool_alloc(&bs->bio_pool, gfp_mask);
bio = p + bs->front_pad;
if (nr_vecs > BIO_INLINE_VECS) {
struct bio_vec *bvl = NULL;
bvl = bvec_alloc(&bs->bvec_pool, &nr_vecs, gfp_mask);
if (!bvl && gfp_mask != saved_gfp) {
punt_bios_to_rescuer(bs);
gfp_mask = saved_gfp;
bvl = bvec_alloc(&bs->bvec_pool, &nr_vecs, gfp_mask);
bio_init(bio, bdev, bvl, nr_vecs, opf);
} else if (nr_vecs) {
bio_init(bio, bdev, bio->bi_inline_vecs, BIO_INLINE_VECS, opf);
bio_init(bio, bdev, NULL, 0, opf);
bio->bi_pool = bs;
mempool_free(p, &bs->bio_pool);
EXPORT_SYMBOL(bio_alloc_bioset);
/**
* bio_kmalloc - kmalloc a bio for I/O
* @gfp_mask: the GFP_* mask given to the slab allocator
* @nr_iovecs: number of iovecs to pre-allocate
*
* Use kmalloc to allocate and initialize a bio.
*
* Returns: Pointer to new bio on success, NULL on failure.
*/
struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs)
{
struct bio *bio;
if (nr_iovecs > UIO_MAXIOV)
return NULL;
bio = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask);
if (unlikely(!bio))
return NULL;
bio_init(bio, NULL, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs,
0);
bio->bi_pool = NULL;
return bio;
}
EXPORT_SYMBOL(bio_kmalloc);
void zero_fill_bio(struct bio *bio)
struct bio_vec bv;
struct bvec_iter iter;
bio_for_each_segment(bv, bio, iter)
memzero_bvec(&bv);
/**
* bio_truncate - truncate the bio to small size of @new_size
* @bio: the bio to be truncated
* @new_size: new size for truncating the bio
*
* Description:
* Truncate the bio to new size of @new_size. If bio_op(bio) is
* REQ_OP_READ, zero the truncated part. This function should only
* be used for handling corner cases, such as bio eod.
*/
static void bio_truncate(struct bio *bio, unsigned new_size)
{
struct bio_vec bv;
struct bvec_iter iter;
unsigned int done = 0;
bool truncated = false;
if (new_size >= bio->bi_iter.bi_size)
return;
if (bio_op(bio) != REQ_OP_READ)
goto exit;
bio_for_each_segment(bv, bio, iter) {
if (done + bv.bv_len > new_size) {
unsigned offset;
if (!truncated)
offset = new_size - done;
else
offset = 0;
zero_user(bv.bv_page, bv.bv_offset + offset,
bv.bv_len - offset);
truncated = true;
}
done += bv.bv_len;
}
exit:
/*
* Don't touch bvec table here and make it really immutable, since
* fs bio user has to retrieve all pages via bio_for_each_segment_all
* in its .end_bio() callback.
*
* It is enough to truncate bio by updating .bi_size since we can make
* correct bvec with the updated .bi_size for drivers.
*/
bio->bi_iter.bi_size = new_size;
}
/**
* guard_bio_eod - truncate a BIO to fit the block device
* @bio: bio to truncate
*
* This allows us to do IO even on the odd last sectors of a device, even if the
* block size is some multiple of the physical sector size.
*
* We'll just truncate the bio to the size of the device, and clear the end of
* the buffer head manually. Truly out-of-range accesses will turn into actual
* I/O errors, this only handles the "we need to be able to do I/O at the final
* sector" case.
*/
void guard_bio_eod(struct bio *bio)
{
sector_t maxsector = bdev_nr_sectors(bio->bi_bdev);
if (!maxsector)
return;
/*
* If the *whole* IO is past the end of the device,
* let it through, and the IO layer will turn it into
* an EIO.
*/
if (unlikely(bio->bi_iter.bi_sector >= maxsector))
return;
maxsector -= bio->bi_iter.bi_sector;
if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
return;
bio_truncate(bio, maxsector << 9);
}
#define ALLOC_CACHE_MAX 512
#define ALLOC_CACHE_SLACK 64
static void bio_alloc_cache_prune(struct bio_alloc_cache *cache,
unsigned int nr)
{
unsigned int i = 0;
struct bio *bio;
while ((bio = cache->free_list) != NULL) {
cache->free_list = bio->bi_next;
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
cache->nr--;
bio_free(bio);
if (++i == nr)
break;
}
}
static int bio_cpu_dead(unsigned int cpu, struct hlist_node *node)
{
struct bio_set *bs;
bs = hlist_entry_safe(node, struct bio_set, cpuhp_dead);
if (bs->cache) {
struct bio_alloc_cache *cache = per_cpu_ptr(bs->cache, cpu);
bio_alloc_cache_prune(cache, -1U);
}
return 0;
}
static void bio_alloc_cache_destroy(struct bio_set *bs)
{
int cpu;
if (!bs->cache)
return;
cpuhp_state_remove_instance_nocalls(CPUHP_BIO_DEAD, &bs->cpuhp_dead);
for_each_possible_cpu(cpu) {
struct bio_alloc_cache *cache;
cache = per_cpu_ptr(bs->cache, cpu);
bio_alloc_cache_prune(cache, -1U);
}
free_percpu(bs->cache);
}
/**
* bio_put - release a reference to a bio
* @bio: bio to release reference to
*
* Description:
* Put a reference to a &struct bio, either one you have gotten with
* bio_alloc, bio_get or bio_clone_*. The last put of a bio will free it.
if (unlikely(bio_flagged(bio, BIO_REFFED))) {
if (!atomic_dec_and_test(&bio->__bi_cnt))
return;
}
if (bio_flagged(bio, BIO_PERCPU_CACHE)) {
struct bio_alloc_cache *cache;
bio_uninit(bio);
cache = per_cpu_ptr(bio->bi_pool->cache, get_cpu());
bio->bi_next = cache->free_list;
cache->free_list = bio;
if (++cache->nr > ALLOC_CACHE_MAX + ALLOC_CACHE_SLACK)
bio_alloc_cache_prune(cache, ALLOC_CACHE_SLACK);
put_cpu();
} else {
bio_free(bio);
EXPORT_SYMBOL(bio_put);
static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
bio_set_flag(bio, BIO_CLONED);
if (bio_flagged(bio_src, BIO_THROTTLED))
bio_set_flag(bio, BIO_THROTTLED);
if (bio->bi_bdev == bio_src->bi_bdev &&
bio_flagged(bio_src, BIO_REMAPPED))
bio_set_flag(bio, BIO_REMAPPED);
bio->bi_ioprio = bio_src->bi_ioprio;
bio_clone_blkg_association(bio, bio_src);
blkcg_bio_issue_init(bio);
if (bio_crypt_clone(bio, bio_src, gfp) < 0)
return -ENOMEM;
if (bio_integrity(bio_src) &&
bio_integrity_clone(bio, bio_src, gfp) < 0)
return -ENOMEM;
return 0;
* bio_alloc_clone - clone a bio that shares the original bio's biovec
* @bdev: block_device to clone onto
* @bio_src: bio to clone from
* @gfp: allocation priority
* @bs: bio_set to allocate from
* Allocate a new bio that is a clone of @bio_src. The caller owns the returned
* bio, but not the actual data it points to.
*
* The caller must ensure that the return bio is not freed before @bio_src.
struct bio *bio_alloc_clone(struct block_device *bdev, struct bio *bio_src,
gfp_t gfp, struct bio_set *bs)
struct bio *bio;
bio = bio_alloc_bioset(bdev, 0, bio_src->bi_opf, gfp, bs);
if (__bio_clone(bio, bio_src, gfp) < 0) {
bio_put(bio);
return NULL;
}
bio->bi_io_vec = bio_src->bi_io_vec;
EXPORT_SYMBOL(bio_alloc_clone);
* bio_init_clone - clone a bio that shares the original bio's biovec
* @bdev: block_device to clone onto
* @bio: bio to clone into
* @bio_src: bio to clone from
* @gfp: allocation priority
*
* Initialize a new bio in caller provided memory that is a clone of @bio_src.
* The caller owns the returned bio, but not the actual data it points to.
*
* The caller must ensure that @bio_src is not freed before @bio.
*/
int bio_init_clone(struct block_device *bdev, struct bio *bio,
struct bio *bio_src, gfp_t gfp)
{
int ret;
bio_init(bio, bdev, bio_src->bi_io_vec, 0, bio_src->bi_opf);
ret = __bio_clone(bio, bio_src, gfp);
if (ret)
bio_uninit(bio);
return ret;
}
EXPORT_SYMBOL(bio_init_clone);
/**
* bio_full - check if the bio is full
* @bio: bio to check
* @len: length of one segment to be added
*
* Return true if @bio is full and one segment with @len bytes can't be
* added to the bio, otherwise return false
*/
static inline bool bio_full(struct bio *bio, unsigned len)
{
if (bio->bi_vcnt >= bio->bi_max_vecs)
return true;
if (bio->bi_iter.bi_size > UINT_MAX - len)
return true;
return false;
}
static inline bool page_is_mergeable(const struct bio_vec *bv,
struct page *page, unsigned int len, unsigned int off,
Christoph Hellwig
committed
bool *same_page)
size_t bv_end = bv->bv_offset + bv->bv_len;
phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) + bv_end - 1;
phys_addr_t page_addr = page_to_phys(page);
if (vec_end_addr + 1 != page_addr + off)
return false;
if (xen_domain() && !xen_biovec_phys_mergeable(bv, page))
return false;
Christoph Hellwig
committed
*same_page = ((vec_end_addr & PAGE_MASK) == page_addr);
if (*same_page)
return true;
return (bv->bv_page + bv_end / PAGE_SIZE) == (page + off / PAGE_SIZE);
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
/**
* __bio_try_merge_page - try appending data to an existing bvec.
* @bio: destination bio
* @page: start page to add
* @len: length of the data to add
* @off: offset of the data relative to @page
* @same_page: return if the segment has been merged inside the same page
*
* Try to add the data at @page + @off to the last bvec of @bio. This is a
* useful optimisation for file systems with a block size smaller than the
* page size.
*
* Warn if (@len, @off) crosses pages in case that @same_page is true.
*
* Return %true on success or %false on failure.
*/
static bool __bio_try_merge_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int off, bool *same_page)
{
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
return false;
if (bio->bi_vcnt > 0) {
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
if (page_is_mergeable(bv, page, len, off, same_page)) {
if (bio->bi_iter.bi_size > UINT_MAX - len) {
*same_page = false;
return false;
}
bv->bv_len += len;
bio->bi_iter.bi_size += len;
return true;
}
}
return false;
}
/*
* Try to merge a page into a segment, while obeying the hardware segment
* size limit. This is not for normal read/write bios, but for passthrough
* or Zone Append operations that we can't split.
*/
static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio,
struct page *page, unsigned len,
unsigned offset, bool *same_page)
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
unsigned long mask = queue_segment_boundary(q);
phys_addr_t addr1 = page_to_phys(bv->bv_page) + bv->bv_offset;
phys_addr_t addr2 = page_to_phys(page) + offset + len - 1;
if ((addr1 | mask) != (addr2 | mask))
return false;
if (bv->bv_len + len > queue_max_segment_size(q))
return false;
return __bio_try_merge_page(bio, page, len, offset, same_page);
* bio_add_hw_page - attempt to add a page to a bio with hw constraints
* @q: the target queue
* @bio: destination bio
* @page: page to add
* @len: vec entry length
* @offset: vec entry offset
* @max_sectors: maximum number of sectors that can be added
* @same_page: return if the segment has been merged inside the same page
* Add a page to a bio while respecting the hardware max_sectors, max_segment
* and gap limitations.
int bio_add_hw_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset,
unsigned int max_sectors, bool *same_page)
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
if (bio->bi_vcnt > 0) {
if (bio_try_merge_hw_seg(q, bio, page, len, offset, same_page))
/*
* If the queue doesn't support SG gaps and adding this segment
* would create a gap, disallow it.
*/
bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
if (bvec_gap_to_prev(q, bvec, offset))
return 0;
}
if (bio->bi_vcnt >= queue_max_segments(q))
Maurizio Lombardi
committed
bvec = &bio->bi_io_vec[bio->bi_vcnt];
bvec->bv_page = page;
bvec->bv_len = len;
bvec->bv_offset = offset;
bio->bi_vcnt++;
bio->bi_iter.bi_size += len;
/**
* bio_add_pc_page - attempt to add page to passthrough bio
* @q: the target queue
* @bio: destination bio
* @page: page to add
* @len: vec entry length
* @offset: vec entry offset
*
* Attempt to add a page to the bio_vec maplist. This can fail for a
* number of reasons, such as the bio being full or target block device
* limitations. The target block device must allow bio's up to PAGE_SIZE,
* so it is always possible to add a single page to an empty bio.
*
* This should only be used by passthrough bios.
*/
int bio_add_pc_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset)
{
bool same_page = false;
return bio_add_hw_page(q, bio, page, len, offset,
queue_max_hw_sectors(q), &same_page);
EXPORT_SYMBOL(bio_add_pc_page);
/**
* bio_add_zone_append_page - attempt to add page to zone-append bio
* @bio: destination bio
* @page: page to add
* @len: vec entry length
* @offset: vec entry offset
*
* Attempt to add a page to the bio_vec maplist of a bio that will be submitted
* for a zone-append request. This can fail for a number of reasons, such as the
* bio being full or the target block device is not a zoned block device or
* other limitations of the target block device. The target block device must
* allow bio's up to PAGE_SIZE, so it is always possible to add a single page
* to an empty bio.
*
* Returns: number of bytes added to the bio, or 0 in case of a failure.
*/
int bio_add_zone_append_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int offset)
{
struct request_queue *q = bdev_get_queue(bio->bi_bdev);