diff options
Diffstat (limited to 'block/blk-wbt.c')
-rw-r--r-- | block/blk-wbt.c | 230 |
1 files changed, 153 insertions, 77 deletions
diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 8641ba9793c5..0c0e270a8265 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -25,12 +25,78 @@ #include <linux/backing-dev.h> #include <linux/swap.h> +#include "blk-stat.h" #include "blk-wbt.h" #include "blk-rq-qos.h" +#include "elevator.h" #define CREATE_TRACE_POINTS #include <trace/events/wbt.h> +enum wbt_flags { + WBT_TRACKED = 1, /* write, tracked for throttling */ + WBT_READ = 2, /* read */ + WBT_KSWAPD = 4, /* write, from kswapd */ + WBT_DISCARD = 8, /* discard */ + + WBT_NR_BITS = 4, /* number of bits */ +}; + +enum { + WBT_RWQ_BG = 0, + WBT_RWQ_KSWAPD, + WBT_RWQ_DISCARD, + WBT_NUM_RWQ, +}; + +/* + * If current state is WBT_STATE_ON/OFF_DEFAULT, it can be covered to any other + * state, if current state is WBT_STATE_ON/OFF_MANUAL, it can only be covered + * to WBT_STATE_OFF/ON_MANUAL. + */ +enum { + WBT_STATE_ON_DEFAULT = 1, /* on by default */ + WBT_STATE_ON_MANUAL = 2, /* on manually by sysfs */ + WBT_STATE_OFF_DEFAULT = 3, /* off by default */ + WBT_STATE_OFF_MANUAL = 4, /* off manually by sysfs */ +}; + +struct rq_wb { + /* + * Settings that govern how we throttle + */ + unsigned int wb_background; /* background writeback */ + unsigned int wb_normal; /* normal writeback */ + + short enable_state; /* WBT_STATE_* */ + + /* + * Number of consecutive periods where we don't have enough + * information to make a firm scale up/down decision. + */ + unsigned int unknown_cnt; + + u64 win_nsec; /* default window size */ + u64 cur_win_nsec; /* current window size */ + + struct blk_stat_callback *cb; + + u64 sync_issue; + void *sync_cookie; + + unsigned long last_issue; /* last non-throttled issue */ + unsigned long last_comp; /* last non-throttled comp */ + unsigned long min_lat_nsec; + struct rq_qos rqos; + struct rq_wait rq_wait[WBT_NUM_RWQ]; + struct rq_depth rq_depth; +}; + +static inline struct rq_wb *RQWB(struct rq_qos *rqos) +{ + return container_of(rqos, struct rq_wb, rqos); +} + static inline void wbt_clear_state(struct request *rq) { rq->wbt_flags = 0; @@ -77,7 +143,8 @@ enum { static inline bool rwb_enabled(struct rq_wb *rwb) { - return rwb && rwb->wb_normal != 0; + return rwb && rwb->enable_state != WBT_STATE_OFF_DEFAULT && + rwb->enable_state != WBT_STATE_OFF_MANUAL; } static void wb_timestamp(struct rq_wb *rwb, unsigned long *var) @@ -96,9 +163,9 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var) */ static bool wb_recent_wait(struct rq_wb *rwb) { - struct bdi_writeback *wb = &rwb->rqos.q->backing_dev_info->wb; + struct backing_dev_info *bdi = rwb->rqos.disk->bdi; - return time_before(jiffies, wb->dirty_sleep + HZ); + return time_before(jiffies, bdi->last_bdp_sleep + HZ); } static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, @@ -132,22 +199,14 @@ static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw, inflight = atomic_dec_return(&rqw->inflight); /* - * wbt got disabled with IO in flight. Wake up any potential - * waiters, we don't have to do more than that. - */ - if (unlikely(!rwb_enabled(rwb))) { - rwb_wake_all(rwb); - return; - } - - /* * For discards, our limit is always the background. For writes, if * the device does write back caching, drop further down before we * wake people up. */ if (wb_acct & WBT_DISCARD) limit = rwb->wb_background; - else if (rwb->wc && !wb_recent_wait(rwb)) + else if (test_bit(QUEUE_FLAG_WC, &rwb->rqos.disk->queue->queue_flags) && + !wb_recent_wait(rwb)) limit = 0; else limit = rwb->wb_normal; @@ -224,6 +283,16 @@ static u64 rwb_sync_issue_lat(struct rq_wb *rwb) return now - issue; } +static inline unsigned int wbt_inflight(struct rq_wb *rwb) +{ + unsigned int i, ret = 0; + + for (i = 0; i < WBT_NUM_RWQ; i++) + ret += atomic_read(&rwb->rq_wait[i].inflight); + + return ret; +} + enum { LAT_OK = 1, LAT_UNKNOWN, @@ -233,7 +302,7 @@ enum { static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat) { - struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info; + struct backing_dev_info *bdi = rwb->rqos.disk->bdi; struct rq_depth *rqd = &rwb->rq_depth; u64 thislat; @@ -286,7 +355,7 @@ static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat) static void rwb_trace_step(struct rq_wb *rwb, const char *msg) { - struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info; + struct backing_dev_info *bdi = rwb->rqos.disk->bdi; struct rq_depth *rqd = &rwb->rq_depth; trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec, @@ -313,7 +382,7 @@ static void scale_up(struct rq_wb *rwb) calc_wb_limits(rwb); rwb->unknown_cnt = 0; rwb_wake_all(rwb); - rwb_trace_step(rwb, "scale up"); + rwb_trace_step(rwb, tracepoint_string("scale up")); } static void scale_down(struct rq_wb *rwb, bool hard_throttle) @@ -322,7 +391,7 @@ static void scale_down(struct rq_wb *rwb, bool hard_throttle) return; calc_wb_limits(rwb); rwb->unknown_cnt = 0; - rwb_trace_step(rwb, "scale down"); + rwb_trace_step(rwb, tracepoint_string("scale down")); } static void rwb_arm_timer(struct rq_wb *rwb) @@ -356,10 +425,12 @@ static void wb_timer_fn(struct blk_stat_callback *cb) unsigned int inflight = wbt_inflight(rwb); int status; + if (!rwb->rqos.disk) + return; + status = latency_exceeded(rwb, cb->stat); - trace_wbt_timer(rwb->rqos.q->backing_dev_info, status, rqd->scale_step, - inflight); + trace_wbt_timer(rwb->rqos.disk->bdi, status, rqd->scale_step, inflight); /* * If we exceeded the latency target, step down. If we did not, @@ -405,7 +476,7 @@ static void wb_timer_fn(struct blk_stat_callback *cb) rwb_arm_timer(rwb); } -static void __wbt_update_limits(struct rq_wb *rwb) +static void wbt_update_limits(struct rq_wb *rwb) { struct rq_depth *rqd = &rwb->rq_depth; @@ -418,12 +489,11 @@ static void __wbt_update_limits(struct rq_wb *rwb) rwb_wake_all(rwb); } -void wbt_update_limits(struct request_queue *q) +bool wbt_disabled(struct request_queue *q) { struct rq_qos *rqos = wbt_rq_qos(q); - if (!rqos) - return; - __wbt_update_limits(RQWB(rqos)); + + return !rqos || !rwb_enabled(RQWB(rqos)); } u64 wbt_get_min_lat(struct request_queue *q) @@ -439,9 +509,14 @@ void wbt_set_min_lat(struct request_queue *q, u64 val) struct rq_qos *rqos = wbt_rq_qos(q); if (!rqos) return; + RQWB(rqos)->min_lat_nsec = val; - RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL; - __wbt_update_limits(RQWB(rqos)); + if (val) + RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL; + else + RQWB(rqos)->enable_state = WBT_STATE_OFF_MANUAL; + + wbt_update_limits(RQWB(rqos)); } @@ -455,18 +530,11 @@ static bool close_io(struct rq_wb *rwb) #define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO) -static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) +static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf) { unsigned int limit; - /* - * If we got disabled, just return UINT_MAX. This ensures that - * we'll properly inc a new IO, and dec+wakeup at the end. - */ - if (!rwb_enabled(rwb)) - return UINT_MAX; - - if ((rw & REQ_OP_MASK) == REQ_OP_DISCARD) + if ((opf & REQ_OP_MASK) == REQ_OP_DISCARD) return rwb->wb_background; /* @@ -477,9 +545,9 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) * the idle limit, or go to normal if we haven't had competing * IO for a bit. */ - if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd()) + if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd()) limit = rwb->rq_depth.max_depth; - else if ((rw & REQ_BACKGROUND) || close_io(rwb)) { + else if ((opf & REQ_BACKGROUND) || close_io(rwb)) { /* * If less than 100ms since we completed unrelated IO, * limit us to half the depth for background writeback. @@ -494,13 +562,13 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) struct wbt_wait_data { struct rq_wb *rwb; enum wbt_flags wb_acct; - unsigned long rw; + blk_opf_t opf; }; static bool wbt_inflight_cb(struct rq_wait *rqw, void *private_data) { struct wbt_wait_data *data = private_data; - return rq_wait_inc_below(rqw, get_limit(data->rwb, data->rw)); + return rq_wait_inc_below(rqw, get_limit(data->rwb, data->opf)); } static void wbt_cleanup_cb(struct rq_wait *rqw, void *private_data) @@ -514,19 +582,19 @@ static void wbt_cleanup_cb(struct rq_wait *rqw, void *private_data) * the timer to kick off queuing again. */ static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct, - unsigned long rw) + blk_opf_t opf) { struct rq_wait *rqw = get_rq_wait(rwb, wb_acct); struct wbt_wait_data data = { .rwb = rwb, .wb_acct = wb_acct, - .rw = rw, + .opf = opf, }; rq_qos_wait(rqw, &data, wbt_inflight_cb, wbt_cleanup_cb); } -static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio) +static inline bool wbt_should_throttle(struct bio *bio) { switch (bio_op(bio)) { case REQ_OP_WRITE: @@ -536,7 +604,7 @@ static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio) if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) == (REQ_SYNC | REQ_IDLE)) return false; - /* fallthrough */ + fallthrough; case REQ_OP_DISCARD: return true; default: @@ -553,7 +621,7 @@ static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio) if (bio_op(bio) == REQ_OP_READ) { flags = WBT_READ; - } else if (wbt_should_throttle(rwb, bio)) { + } else if (wbt_should_throttle(bio)) { if (current_is_kswapd()) flags |= WBT_KSWAPD; if (bio_op(bio) == REQ_OP_DISCARD) @@ -571,7 +639,6 @@ static void wbt_cleanup(struct rq_qos *rqos, struct bio *bio) } /* - * Returns true if the IO request should be accounted, false if not. * May sleep, if we have exceeded the writeback limits. Caller can pass * in an irq held spinlock, if it holds one when calling this function. * If we do sleep, we'll release and re-grab it. @@ -631,29 +698,33 @@ static void wbt_requeue(struct rq_qos *rqos, struct request *rq) } } -void wbt_set_write_cache(struct request_queue *q, bool write_cache_on) -{ - struct rq_qos *rqos = wbt_rq_qos(q); - if (rqos) - RQWB(rqos)->wc = write_cache_on; -} - /* * Enable wbt if defaults are configured that way */ -void wbt_enable_default(struct request_queue *q) +void wbt_enable_default(struct gendisk *disk) { - struct rq_qos *rqos = wbt_rq_qos(q); + struct request_queue *q = disk->queue; + struct rq_qos *rqos; + bool enable = IS_ENABLED(CONFIG_BLK_WBT_MQ); + + if (q->elevator && + test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags)) + enable = false; + /* Throttling already enabled? */ - if (rqos) + rqos = wbt_rq_qos(q); + if (rqos) { + if (enable && RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT) + RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT; return; + } /* Queue not registered? Maybe shutting down... */ if (!blk_queue_registered(q)) return; - if (queue_is_mq(q) && IS_ENABLED(CONFIG_BLK_WBT_MQ)) - wbt_init(q); + if (queue_is_mq(q) && enable) + wbt_init(disk); } EXPORT_SYMBOL_GPL(wbt_enable_default); @@ -671,7 +742,7 @@ u64 wbt_default_latency_nsec(struct request_queue *q) static int wbt_data_dir(const struct request *rq) { - const int op = req_op(rq); + const enum req_op op = req_op(rq); if (op == REQ_OP_READ) return READ; @@ -684,16 +755,15 @@ static int wbt_data_dir(const struct request *rq) static void wbt_queue_depth_changed(struct rq_qos *rqos) { - RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->q); - __wbt_update_limits(RQWB(rqos)); + RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->disk->queue); + wbt_update_limits(RQWB(rqos)); } static void wbt_exit(struct rq_qos *rqos) { struct rq_wb *rwb = RQWB(rqos); - struct request_queue *q = rqos->q; - blk_stat_remove_callback(q, rwb->cb); + blk_stat_remove_callback(rqos->disk->queue, rwb->cb); blk_stat_free_callback(rwb->cb); kfree(rwb); } @@ -701,16 +771,16 @@ static void wbt_exit(struct rq_qos *rqos) /* * Disable wbt, if enabled by default. */ -void wbt_disable_default(struct request_queue *q) +void wbt_disable_default(struct gendisk *disk) { - struct rq_qos *rqos = wbt_rq_qos(q); + struct rq_qos *rqos = wbt_rq_qos(disk->queue); struct rq_wb *rwb; if (!rqos) return; rwb = RQWB(rqos); if (rwb->enable_state == WBT_STATE_ON_DEFAULT) { blk_stat_deactivate(rwb->cb); - rwb->wb_normal = 0; + rwb->enable_state = WBT_STATE_OFF_DEFAULT; } } EXPORT_SYMBOL_GPL(wbt_disable_default); @@ -803,7 +873,7 @@ static const struct blk_mq_debugfs_attr wbt_debugfs_attrs[] = { }; #endif -static struct rq_qos_ops wbt_rqos_ops = { +static const struct rq_qos_ops wbt_rqos_ops = { .throttle = wbt_wait, .issue = wbt_issue, .track = wbt_track, @@ -817,10 +887,12 @@ static struct rq_qos_ops wbt_rqos_ops = { #endif }; -int wbt_init(struct request_queue *q) +int wbt_init(struct gendisk *disk) { + struct request_queue *q = disk->queue; struct rq_wb *rwb; int i; + int ret; rwb = kzalloc(sizeof(*rwb), GFP_KERNEL); if (!rwb) @@ -835,26 +907,30 @@ int wbt_init(struct request_queue *q) for (i = 0; i < WBT_NUM_RWQ; i++) rq_wait_init(&rwb->rq_wait[i]); - rwb->rqos.id = RQ_QOS_WBT; - rwb->rqos.ops = &wbt_rqos_ops; - rwb->rqos.q = q; rwb->last_comp = rwb->last_issue = jiffies; rwb->win_nsec = RWB_WINDOW_NSEC; rwb->enable_state = WBT_STATE_ON_DEFAULT; - rwb->wc = 1; rwb->rq_depth.default_depth = RWB_DEF_DEPTH; - __wbt_update_limits(rwb); + rwb->min_lat_nsec = wbt_default_latency_nsec(q); + rwb->rq_depth.queue_depth = blk_queue_depth(q); + wbt_update_limits(rwb); /* * Assign rwb and add the stats callback. */ - rq_qos_add(q, &rwb->rqos); + mutex_lock(&q->rq_qos_mutex); + ret = rq_qos_add(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops); + mutex_unlock(&q->rq_qos_mutex); + if (ret) + goto err_free; + blk_stat_add_callback(q, rwb->cb); - rwb->min_lat_nsec = wbt_default_latency_nsec(q); + return 0; - wbt_queue_depth_changed(&rwb->rqos); - wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags)); +err_free: + blk_stat_free_callback(rwb->cb); + kfree(rwb); + return ret; - return 0; } |