diff options
Diffstat (limited to 'block/bio.c')
-rw-r--r-- | block/bio.c | 256 |
1 files changed, 154 insertions, 102 deletions
diff --git a/block/bio.c b/block/bio.c index 41173710430c..4db1008309ed 100644 --- a/block/bio.c +++ b/block/bio.c @@ -244,7 +244,7 @@ fallback: void bio_uninit(struct bio *bio) { - bio_disassociate_task(bio); + bio_disassociate_blkg(bio); } EXPORT_SYMBOL(bio_uninit); @@ -571,14 +571,13 @@ void bio_put(struct bio *bio) } EXPORT_SYMBOL(bio_put); -inline int bio_phys_segments(struct request_queue *q, struct bio *bio) +int bio_phys_segments(struct request_queue *q, struct bio *bio) { if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) blk_recount_segments(q, bio); return bio->bi_phys_segments; } -EXPORT_SYMBOL(bio_phys_segments); /** * __bio_clone_fast - clone a bio that shares the original bio's biovec @@ -605,11 +604,13 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) if (bio_flagged(bio_src, BIO_THROTTLED)) bio_set_flag(bio, BIO_THROTTLED); bio->bi_opf = bio_src->bi_opf; + bio->bi_ioprio = bio_src->bi_ioprio; bio->bi_write_hint = bio_src->bi_write_hint; bio->bi_iter = bio_src->bi_iter; bio->bi_io_vec = bio_src->bi_io_vec; - bio_clone_blkcg_association(bio, bio_src); + bio_clone_blkg_association(bio, bio_src); + blkcg_bio_issue_init(bio); } EXPORT_SYMBOL(__bio_clone_fast); @@ -729,7 +730,7 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page } /* If we may be able to merge these biovecs, force a recount */ - if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) + if (bio->bi_vcnt > 1 && biovec_phys_mergeable(q, bvec - 1, bvec)) bio_clear_flag(bio, BIO_SEG_VALID); done: @@ -827,6 +828,8 @@ int bio_add_page(struct bio *bio, struct page *page, } EXPORT_SYMBOL(bio_add_page); +#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *)) + /** * __bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio * @bio: bio to add pages to @@ -839,38 +842,35 @@ EXPORT_SYMBOL(bio_add_page); */ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) { - unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx; + unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; + unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; struct page **pages = (struct page **)bv; + ssize_t size, left; + unsigned len, i; size_t offset; - ssize_t size; + + /* + * Move page array up in the allocated memory for the bio vecs as far as + * possible so that we can start filling biovecs from the beginning + * without overwriting the temporary page array. + */ + BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2); + pages += entries_left * (PAGE_PTRS_PER_BVEC - 1); size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); if (unlikely(size <= 0)) return size ? size : -EFAULT; - idx = nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE; - /* - * Deep magic below: We need to walk the pinned pages backwards - * because we are abusing the space allocated for the bio_vecs - * for the page array. Because the bio_vecs are larger than the - * page pointers by definition this will always work. But it also - * means we can't use bio_add_page, so any changes to it's semantics - * need to be reflected here as well. - */ - bio->bi_iter.bi_size += size; - bio->bi_vcnt += nr_pages; + for (left = size, i = 0; left > 0; left -= len, i++) { + struct page *page = pages[i]; - while (idx--) { - bv[idx].bv_page = pages[idx]; - bv[idx].bv_len = PAGE_SIZE; - bv[idx].bv_offset = 0; + len = min_t(size_t, PAGE_SIZE - offset, left); + if (WARN_ON_ONCE(bio_add_page(bio, page, len, offset) != len)) + return -EINVAL; + offset = 0; } - bv[0].bv_offset += offset; - bv[0].bv_len -= offset; - bv[nr_pages - 1].bv_len -= nr_pages * PAGE_SIZE - offset - size; - iov_iter_advance(iter, size); return 0; } @@ -901,7 +901,6 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) return 0; } -EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages); static void submit_bio_wait_endio(struct bio *bio) { @@ -1255,13 +1254,14 @@ struct bio *bio_copy_user_iov(struct request_queue *q, /* * success */ - if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) || + if ((iov_iter_rw(iter) == WRITE && (!map_data || !map_data->null_mapped)) || (map_data && map_data->from_user)) { ret = bio_copy_from_iter(bio, iter); if (ret) goto cleanup; } else { - zero_fill_bio(bio); + if (bmd->is_our_pages) + zero_fill_bio(bio); iov_iter_advance(iter, bio->bi_iter.bi_size); } @@ -1591,7 +1591,6 @@ void bio_set_pages_dirty(struct bio *bio) set_page_dirty_lock(bvec->bv_page); } } -EXPORT_SYMBOL_GPL(bio_set_pages_dirty); static void bio_release_pages(struct bio *bio) { @@ -1661,17 +1660,33 @@ defer: spin_unlock_irqrestore(&bio_dirty_lock, flags); schedule_work(&bio_dirty_work); } -EXPORT_SYMBOL_GPL(bio_check_pages_dirty); + +void update_io_ticks(struct hd_struct *part, unsigned long now) +{ + unsigned long stamp; +again: + stamp = READ_ONCE(part->stamp); + if (unlikely(stamp != now)) { + if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) { + __part_stat_add(part, io_ticks, 1); + } + } + if (part->partno) { + part = &part_to_disk(part)->part0; + goto again; + } +} void generic_start_io_acct(struct request_queue *q, int op, unsigned long sectors, struct hd_struct *part) { const int sgrp = op_stat_group(op); - int cpu = part_stat_lock(); - part_round_stats(q, cpu, part); - part_stat_inc(cpu, part, ios[sgrp]); - part_stat_add(cpu, part, sectors[sgrp], sectors); + part_stat_lock(); + + update_io_ticks(part, jiffies); + part_stat_inc(part, ios[sgrp]); + part_stat_add(part, sectors[sgrp], sectors); part_inc_in_flight(q, part, op_is_write(op)); part_stat_unlock(); @@ -1681,12 +1696,15 @@ EXPORT_SYMBOL(generic_start_io_acct); void generic_end_io_acct(struct request_queue *q, int req_op, struct hd_struct *part, unsigned long start_time) { - unsigned long duration = jiffies - start_time; + unsigned long now = jiffies; + unsigned long duration = now - start_time; const int sgrp = op_stat_group(req_op); - int cpu = part_stat_lock(); - part_stat_add(cpu, part, nsecs[sgrp], jiffies_to_nsecs(duration)); - part_round_stats(q, cpu, part); + part_stat_lock(); + + update_io_ticks(part, now); + part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration)); + part_stat_add(part, time_in_queue, duration); part_dec_in_flight(q, part, op_is_write(req_op)); part_stat_unlock(); @@ -1808,7 +1826,6 @@ struct bio *bio_split(struct bio *bio, int sectors, bio_integrity_trim(split); bio_advance(bio, split->bi_iter.bi_size); - bio->bi_iter.bi_done = 0; if (bio_flagged(bio, BIO_TRACE_COMPLETION)) bio_set_flag(split, BIO_TRACE_COMPLETION); @@ -1957,102 +1974,137 @@ EXPORT_SYMBOL(bioset_init_from_src); #ifdef CONFIG_BLK_CGROUP -#ifdef CONFIG_MEMCG /** - * bio_associate_blkcg_from_page - associate a bio with the page's blkcg + * bio_disassociate_blkg - puts back the blkg reference if associated * @bio: target bio - * @page: the page to lookup the blkcg from * - * Associate @bio with the blkcg from @page's owning memcg. This works like - * every other associate function wrt references. + * Helper to disassociate the blkg from @bio if a blkg is associated. */ -int bio_associate_blkcg_from_page(struct bio *bio, struct page *page) +void bio_disassociate_blkg(struct bio *bio) { - struct cgroup_subsys_state *blkcg_css; - - if (unlikely(bio->bi_css)) - return -EBUSY; - if (!page->mem_cgroup) - return 0; - blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup, - &io_cgrp_subsys); - bio->bi_css = blkcg_css; - return 0; + if (bio->bi_blkg) { + blkg_put(bio->bi_blkg); + bio->bi_blkg = NULL; + } } -#endif /* CONFIG_MEMCG */ +EXPORT_SYMBOL_GPL(bio_disassociate_blkg); /** - * bio_associate_blkcg - associate a bio with the specified blkcg + * __bio_associate_blkg - associate a bio with the a blkg * @bio: target bio - * @blkcg_css: css of the blkcg to associate + * @blkg: the blkg to associate * - * Associate @bio with the blkcg specified by @blkcg_css. Block layer will - * treat @bio as if it were issued by a task which belongs to the blkcg. + * This tries to associate @bio with the specified @blkg. Association failure + * is handled by walking up the blkg tree. Therefore, the blkg associated can + * be anything between @blkg and the root_blkg. This situation only happens + * when a cgroup is dying and then the remaining bios will spill to the closest + * alive blkg. * - * This function takes an extra reference of @blkcg_css which will be put - * when @bio is released. The caller must own @bio and is responsible for - * synchronizing calls to this function. + * A reference will be taken on the @blkg and will be released when @bio is + * freed. */ -int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) +static void __bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) { - if (unlikely(bio->bi_css)) - return -EBUSY; - css_get(blkcg_css); - bio->bi_css = blkcg_css; - return 0; + bio_disassociate_blkg(bio); + + bio->bi_blkg = blkg_tryget_closest(blkg); } -EXPORT_SYMBOL_GPL(bio_associate_blkcg); /** - * bio_associate_blkg - associate a bio with the specified blkg + * bio_associate_blkg_from_css - associate a bio with a specified css * @bio: target bio - * @blkg: the blkg to associate + * @css: target css * - * Associate @bio with the blkg specified by @blkg. This is the queue specific - * blkcg information associated with the @bio, a reference will be taken on the - * @blkg and will be freed when the bio is freed. + * Associate @bio with the blkg found by combining the css's blkg and the + * request_queue of the @bio. This falls back to the queue's root_blkg if + * the association fails with the css. */ -int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) +void bio_associate_blkg_from_css(struct bio *bio, + struct cgroup_subsys_state *css) { - if (unlikely(bio->bi_blkg)) - return -EBUSY; - if (!blkg_try_get(blkg)) - return -ENODEV; - bio->bi_blkg = blkg; - return 0; + struct request_queue *q = bio->bi_disk->queue; + struct blkcg_gq *blkg; + + rcu_read_lock(); + + if (!css || !css->parent) + blkg = q->root_blkg; + else + blkg = blkg_lookup_create(css_to_blkcg(css), q); + + __bio_associate_blkg(bio, blkg); + + rcu_read_unlock(); } +EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css); +#ifdef CONFIG_MEMCG /** - * bio_disassociate_task - undo bio_associate_current() + * bio_associate_blkg_from_page - associate a bio with the page's blkg * @bio: target bio + * @page: the page to lookup the blkcg from + * + * Associate @bio with the blkg from @page's owning memcg and the respective + * request_queue. If cgroup_e_css returns %NULL, fall back to the queue's + * root_blkg. */ -void bio_disassociate_task(struct bio *bio) +void bio_associate_blkg_from_page(struct bio *bio, struct page *page) { - if (bio->bi_ioc) { - put_io_context(bio->bi_ioc); - bio->bi_ioc = NULL; - } - if (bio->bi_css) { - css_put(bio->bi_css); - bio->bi_css = NULL; - } - if (bio->bi_blkg) { - blkg_put(bio->bi_blkg); - bio->bi_blkg = NULL; - } + struct cgroup_subsys_state *css; + + if (!page->mem_cgroup) + return; + + rcu_read_lock(); + + css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys); + bio_associate_blkg_from_css(bio, css); + + rcu_read_unlock(); +} +#endif /* CONFIG_MEMCG */ + +/** + * bio_associate_blkg - associate a bio with a blkg + * @bio: target bio + * + * Associate @bio with the blkg found from the bio's css and request_queue. + * If one is not found, bio_lookup_blkg() creates the blkg. If a blkg is + * already associated, the css is reused and association redone as the + * request_queue may have changed. + */ +void bio_associate_blkg(struct bio *bio) +{ + struct cgroup_subsys_state *css; + + rcu_read_lock(); + + if (bio->bi_blkg) + css = &bio_blkcg(bio)->css; + else + css = blkcg_css(); + + bio_associate_blkg_from_css(bio, css); + + rcu_read_unlock(); } +EXPORT_SYMBOL_GPL(bio_associate_blkg); /** - * bio_clone_blkcg_association - clone blkcg association from src to dst bio + * bio_clone_blkg_association - clone blkg association from src to dst bio * @dst: destination bio * @src: source bio */ -void bio_clone_blkcg_association(struct bio *dst, struct bio *src) +void bio_clone_blkg_association(struct bio *dst, struct bio *src) { - if (src->bi_css) - WARN_ON(bio_associate_blkcg(dst, src->bi_css)); + rcu_read_lock(); + + if (src->bi_blkg) + __bio_associate_blkg(dst, src->bi_blkg); + + rcu_read_unlock(); } -EXPORT_SYMBOL_GPL(bio_clone_blkcg_association); +EXPORT_SYMBOL_GPL(bio_clone_blkg_association); #endif /* CONFIG_BLK_CGROUP */ static void __init biovec_init_slabs(void) |