aboutsummaryrefslogtreecommitdiffstats
path: root/block/blk-wbt.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-wbt.c')
-rw-r--r--block/blk-wbt.c230
1 files changed, 153 insertions, 77 deletions
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 8641ba9793c5..0c0e270a8265 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -25,12 +25,78 @@
#include <linux/backing-dev.h>
#include <linux/swap.h>
+#include "blk-stat.h"
#include "blk-wbt.h"
#include "blk-rq-qos.h"
+#include "elevator.h"
#define CREATE_TRACE_POINTS
#include <trace/events/wbt.h>
+enum wbt_flags {
+ WBT_TRACKED = 1, /* write, tracked for throttling */
+ WBT_READ = 2, /* read */
+ WBT_KSWAPD = 4, /* write, from kswapd */
+ WBT_DISCARD = 8, /* discard */
+
+ WBT_NR_BITS = 4, /* number of bits */
+};
+
+enum {
+ WBT_RWQ_BG = 0,
+ WBT_RWQ_KSWAPD,
+ WBT_RWQ_DISCARD,
+ WBT_NUM_RWQ,
+};
+
+/*
+ * If current state is WBT_STATE_ON/OFF_DEFAULT, it can be covered to any other
+ * state, if current state is WBT_STATE_ON/OFF_MANUAL, it can only be covered
+ * to WBT_STATE_OFF/ON_MANUAL.
+ */
+enum {
+ WBT_STATE_ON_DEFAULT = 1, /* on by default */
+ WBT_STATE_ON_MANUAL = 2, /* on manually by sysfs */
+ WBT_STATE_OFF_DEFAULT = 3, /* off by default */
+ WBT_STATE_OFF_MANUAL = 4, /* off manually by sysfs */
+};
+
+struct rq_wb {
+ /*
+ * Settings that govern how we throttle
+ */
+ unsigned int wb_background; /* background writeback */
+ unsigned int wb_normal; /* normal writeback */
+
+ short enable_state; /* WBT_STATE_* */
+
+ /*
+ * Number of consecutive periods where we don't have enough
+ * information to make a firm scale up/down decision.
+ */
+ unsigned int unknown_cnt;
+
+ u64 win_nsec; /* default window size */
+ u64 cur_win_nsec; /* current window size */
+
+ struct blk_stat_callback *cb;
+
+ u64 sync_issue;
+ void *sync_cookie;
+
+ unsigned long last_issue; /* last non-throttled issue */
+ unsigned long last_comp; /* last non-throttled comp */
+ unsigned long min_lat_nsec;
+ struct rq_qos rqos;
+ struct rq_wait rq_wait[WBT_NUM_RWQ];
+ struct rq_depth rq_depth;
+};
+
+static inline struct rq_wb *RQWB(struct rq_qos *rqos)
+{
+ return container_of(rqos, struct rq_wb, rqos);
+}
+
static inline void wbt_clear_state(struct request *rq)
{
rq->wbt_flags = 0;
@@ -77,7 +143,8 @@ enum {
static inline bool rwb_enabled(struct rq_wb *rwb)
{
- return rwb && rwb->wb_normal != 0;
+ return rwb && rwb->enable_state != WBT_STATE_OFF_DEFAULT &&
+ rwb->enable_state != WBT_STATE_OFF_MANUAL;
}
static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
@@ -96,9 +163,9 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
*/
static bool wb_recent_wait(struct rq_wb *rwb)
{
- struct bdi_writeback *wb = &rwb->rqos.q->backing_dev_info->wb;
+ struct backing_dev_info *bdi = rwb->rqos.disk->bdi;
- return time_before(jiffies, wb->dirty_sleep + HZ);
+ return time_before(jiffies, bdi->last_bdp_sleep + HZ);
}
static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
@@ -132,22 +199,14 @@ static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw,
inflight = atomic_dec_return(&rqw->inflight);
/*
- * wbt got disabled with IO in flight. Wake up any potential
- * waiters, we don't have to do more than that.
- */
- if (unlikely(!rwb_enabled(rwb))) {
- rwb_wake_all(rwb);
- return;
- }
-
- /*
* For discards, our limit is always the background. For writes, if
* the device does write back caching, drop further down before we
* wake people up.
*/
if (wb_acct & WBT_DISCARD)
limit = rwb->wb_background;
- else if (rwb->wc && !wb_recent_wait(rwb))
+ else if (test_bit(QUEUE_FLAG_WC, &rwb->rqos.disk->queue->queue_flags) &&
+ !wb_recent_wait(rwb))
limit = 0;
else
limit = rwb->wb_normal;
@@ -224,6 +283,16 @@ static u64 rwb_sync_issue_lat(struct rq_wb *rwb)
return now - issue;
}
+static inline unsigned int wbt_inflight(struct rq_wb *rwb)
+{
+ unsigned int i, ret = 0;
+
+ for (i = 0; i < WBT_NUM_RWQ; i++)
+ ret += atomic_read(&rwb->rq_wait[i].inflight);
+
+ return ret;
+}
+
enum {
LAT_OK = 1,
LAT_UNKNOWN,
@@ -233,7 +302,7 @@ enum {
static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
{
- struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info;
+ struct backing_dev_info *bdi = rwb->rqos.disk->bdi;
struct rq_depth *rqd = &rwb->rq_depth;
u64 thislat;
@@ -286,7 +355,7 @@ static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
static void rwb_trace_step(struct rq_wb *rwb, const char *msg)
{
- struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info;
+ struct backing_dev_info *bdi = rwb->rqos.disk->bdi;
struct rq_depth *rqd = &rwb->rq_depth;
trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec,
@@ -313,7 +382,7 @@ static void scale_up(struct rq_wb *rwb)
calc_wb_limits(rwb);
rwb->unknown_cnt = 0;
rwb_wake_all(rwb);
- rwb_trace_step(rwb, "scale up");
+ rwb_trace_step(rwb, tracepoint_string("scale up"));
}
static void scale_down(struct rq_wb *rwb, bool hard_throttle)
@@ -322,7 +391,7 @@ static void scale_down(struct rq_wb *rwb, bool hard_throttle)
return;
calc_wb_limits(rwb);
rwb->unknown_cnt = 0;
- rwb_trace_step(rwb, "scale down");
+ rwb_trace_step(rwb, tracepoint_string("scale down"));
}
static void rwb_arm_timer(struct rq_wb *rwb)
@@ -356,10 +425,12 @@ static void wb_timer_fn(struct blk_stat_callback *cb)
unsigned int inflight = wbt_inflight(rwb);
int status;
+ if (!rwb->rqos.disk)
+ return;
+
status = latency_exceeded(rwb, cb->stat);
- trace_wbt_timer(rwb->rqos.q->backing_dev_info, status, rqd->scale_step,
- inflight);
+ trace_wbt_timer(rwb->rqos.disk->bdi, status, rqd->scale_step, inflight);
/*
* If we exceeded the latency target, step down. If we did not,
@@ -405,7 +476,7 @@ static void wb_timer_fn(struct blk_stat_callback *cb)
rwb_arm_timer(rwb);
}
-static void __wbt_update_limits(struct rq_wb *rwb)
+static void wbt_update_limits(struct rq_wb *rwb)
{
struct rq_depth *rqd = &rwb->rq_depth;
@@ -418,12 +489,11 @@ static void __wbt_update_limits(struct rq_wb *rwb)
rwb_wake_all(rwb);
}
-void wbt_update_limits(struct request_queue *q)
+bool wbt_disabled(struct request_queue *q)
{
struct rq_qos *rqos = wbt_rq_qos(q);
- if (!rqos)
- return;
- __wbt_update_limits(RQWB(rqos));
+
+ return !rqos || !rwb_enabled(RQWB(rqos));
}
u64 wbt_get_min_lat(struct request_queue *q)
@@ -439,9 +509,14 @@ void wbt_set_min_lat(struct request_queue *q, u64 val)
struct rq_qos *rqos = wbt_rq_qos(q);
if (!rqos)
return;
+
RQWB(rqos)->min_lat_nsec = val;
- RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL;
- __wbt_update_limits(RQWB(rqos));
+ if (val)
+ RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL;
+ else
+ RQWB(rqos)->enable_state = WBT_STATE_OFF_MANUAL;
+
+ wbt_update_limits(RQWB(rqos));
}
@@ -455,18 +530,11 @@ static bool close_io(struct rq_wb *rwb)
#define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO)
-static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
+static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf)
{
unsigned int limit;
- /*
- * If we got disabled, just return UINT_MAX. This ensures that
- * we'll properly inc a new IO, and dec+wakeup at the end.
- */
- if (!rwb_enabled(rwb))
- return UINT_MAX;
-
- if ((rw & REQ_OP_MASK) == REQ_OP_DISCARD)
+ if ((opf & REQ_OP_MASK) == REQ_OP_DISCARD)
return rwb->wb_background;
/*
@@ -477,9 +545,9 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
* the idle limit, or go to normal if we haven't had competing
* IO for a bit.
*/
- if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd())
+ if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd())
limit = rwb->rq_depth.max_depth;
- else if ((rw & REQ_BACKGROUND) || close_io(rwb)) {
+ else if ((opf & REQ_BACKGROUND) || close_io(rwb)) {
/*
* If less than 100ms since we completed unrelated IO,
* limit us to half the depth for background writeback.
@@ -494,13 +562,13 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
struct wbt_wait_data {
struct rq_wb *rwb;
enum wbt_flags wb_acct;
- unsigned long rw;
+ blk_opf_t opf;
};
static bool wbt_inflight_cb(struct rq_wait *rqw, void *private_data)
{
struct wbt_wait_data *data = private_data;
- return rq_wait_inc_below(rqw, get_limit(data->rwb, data->rw));
+ return rq_wait_inc_below(rqw, get_limit(data->rwb, data->opf));
}
static void wbt_cleanup_cb(struct rq_wait *rqw, void *private_data)
@@ -514,19 +582,19 @@ static void wbt_cleanup_cb(struct rq_wait *rqw, void *private_data)
* the timer to kick off queuing again.
*/
static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
- unsigned long rw)
+ blk_opf_t opf)
{
struct rq_wait *rqw = get_rq_wait(rwb, wb_acct);
struct wbt_wait_data data = {
.rwb = rwb,
.wb_acct = wb_acct,
- .rw = rw,
+ .opf = opf,
};
rq_qos_wait(rqw, &data, wbt_inflight_cb, wbt_cleanup_cb);
}
-static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
+static inline bool wbt_should_throttle(struct bio *bio)
{
switch (bio_op(bio)) {
case REQ_OP_WRITE:
@@ -536,7 +604,7 @@ static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) ==
(REQ_SYNC | REQ_IDLE))
return false;
- /* fallthrough */
+ fallthrough;
case REQ_OP_DISCARD:
return true;
default:
@@ -553,7 +621,7 @@ static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio)
if (bio_op(bio) == REQ_OP_READ) {
flags = WBT_READ;
- } else if (wbt_should_throttle(rwb, bio)) {
+ } else if (wbt_should_throttle(bio)) {
if (current_is_kswapd())
flags |= WBT_KSWAPD;
if (bio_op(bio) == REQ_OP_DISCARD)
@@ -571,7 +639,6 @@ static void wbt_cleanup(struct rq_qos *rqos, struct bio *bio)
}
/*
- * Returns true if the IO request should be accounted, false if not.
* May sleep, if we have exceeded the writeback limits. Caller can pass
* in an irq held spinlock, if it holds one when calling this function.
* If we do sleep, we'll release and re-grab it.
@@ -631,29 +698,33 @@ static void wbt_requeue(struct rq_qos *rqos, struct request *rq)
}
}
-void wbt_set_write_cache(struct request_queue *q, bool write_cache_on)
-{
- struct rq_qos *rqos = wbt_rq_qos(q);
- if (rqos)
- RQWB(rqos)->wc = write_cache_on;
-}
-
/*
* Enable wbt if defaults are configured that way
*/
-void wbt_enable_default(struct request_queue *q)
+void wbt_enable_default(struct gendisk *disk)
{
- struct rq_qos *rqos = wbt_rq_qos(q);
+ struct request_queue *q = disk->queue;
+ struct rq_qos *rqos;
+ bool enable = IS_ENABLED(CONFIG_BLK_WBT_MQ);
+
+ if (q->elevator &&
+ test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags))
+ enable = false;
+
/* Throttling already enabled? */
- if (rqos)
+ rqos = wbt_rq_qos(q);
+ if (rqos) {
+ if (enable && RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
+ RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT;
return;
+ }
/* Queue not registered? Maybe shutting down... */
if (!blk_queue_registered(q))
return;
- if (queue_is_mq(q) && IS_ENABLED(CONFIG_BLK_WBT_MQ))
- wbt_init(q);
+ if (queue_is_mq(q) && enable)
+ wbt_init(disk);
}
EXPORT_SYMBOL_GPL(wbt_enable_default);
@@ -671,7 +742,7 @@ u64 wbt_default_latency_nsec(struct request_queue *q)
static int wbt_data_dir(const struct request *rq)
{
- const int op = req_op(rq);
+ const enum req_op op = req_op(rq);
if (op == REQ_OP_READ)
return READ;
@@ -684,16 +755,15 @@ static int wbt_data_dir(const struct request *rq)
static void wbt_queue_depth_changed(struct rq_qos *rqos)
{
- RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->q);
- __wbt_update_limits(RQWB(rqos));
+ RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->disk->queue);
+ wbt_update_limits(RQWB(rqos));
}
static void wbt_exit(struct rq_qos *rqos)
{
struct rq_wb *rwb = RQWB(rqos);
- struct request_queue *q = rqos->q;
- blk_stat_remove_callback(q, rwb->cb);
+ blk_stat_remove_callback(rqos->disk->queue, rwb->cb);
blk_stat_free_callback(rwb->cb);
kfree(rwb);
}
@@ -701,16 +771,16 @@ static void wbt_exit(struct rq_qos *rqos)
/*
* Disable wbt, if enabled by default.
*/
-void wbt_disable_default(struct request_queue *q)
+void wbt_disable_default(struct gendisk *disk)
{
- struct rq_qos *rqos = wbt_rq_qos(q);
+ struct rq_qos *rqos = wbt_rq_qos(disk->queue);
struct rq_wb *rwb;
if (!rqos)
return;
rwb = RQWB(rqos);
if (rwb->enable_state == WBT_STATE_ON_DEFAULT) {
blk_stat_deactivate(rwb->cb);
- rwb->wb_normal = 0;
+ rwb->enable_state = WBT_STATE_OFF_DEFAULT;
}
}
EXPORT_SYMBOL_GPL(wbt_disable_default);
@@ -803,7 +873,7 @@ static const struct blk_mq_debugfs_attr wbt_debugfs_attrs[] = {
};
#endif
-static struct rq_qos_ops wbt_rqos_ops = {
+static const struct rq_qos_ops wbt_rqos_ops = {
.throttle = wbt_wait,
.issue = wbt_issue,
.track = wbt_track,
@@ -817,10 +887,12 @@ static struct rq_qos_ops wbt_rqos_ops = {
#endif
};
-int wbt_init(struct request_queue *q)
+int wbt_init(struct gendisk *disk)
{
+ struct request_queue *q = disk->queue;
struct rq_wb *rwb;
int i;
+ int ret;
rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
if (!rwb)
@@ -835,26 +907,30 @@ int wbt_init(struct request_queue *q)
for (i = 0; i < WBT_NUM_RWQ; i++)
rq_wait_init(&rwb->rq_wait[i]);
- rwb->rqos.id = RQ_QOS_WBT;
- rwb->rqos.ops = &wbt_rqos_ops;
- rwb->rqos.q = q;
rwb->last_comp = rwb->last_issue = jiffies;
rwb->win_nsec = RWB_WINDOW_NSEC;
rwb->enable_state = WBT_STATE_ON_DEFAULT;
- rwb->wc = 1;
rwb->rq_depth.default_depth = RWB_DEF_DEPTH;
- __wbt_update_limits(rwb);
+ rwb->min_lat_nsec = wbt_default_latency_nsec(q);
+ rwb->rq_depth.queue_depth = blk_queue_depth(q);
+ wbt_update_limits(rwb);
/*
* Assign rwb and add the stats callback.
*/
- rq_qos_add(q, &rwb->rqos);
+ mutex_lock(&q->rq_qos_mutex);
+ ret = rq_qos_add(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops);
+ mutex_unlock(&q->rq_qos_mutex);
+ if (ret)
+ goto err_free;
+
blk_stat_add_callback(q, rwb->cb);
- rwb->min_lat_nsec = wbt_default_latency_nsec(q);
+ return 0;
- wbt_queue_depth_changed(&rwb->rqos);
- wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
+err_free:
+ blk_stat_free_callback(rwb->cb);
+ kfree(rwb);
+ return ret;
- return 0;
}