summaryrefslogtreecommitdiffstats
path: root/block/blk-ioprio.c
blob: 332a07761bf8bb7e1c1d35c14aed0ccfde57889f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
// SPDX-License-Identifier: GPL-2.0
/*
 * Block rq-qos policy for assigning an I/O priority class to requests.
 *
 * Using an rq-qos policy for assigning I/O priority class has two advantages
 * over using the ioprio_set() system call:
 *
 * - This policy is cgroup based so it has all the advantages of cgroups.
 * - While ioprio_set() does not affect page cache writeback I/O, this rq-qos
 *   controller affects page cache writeback I/O for filesystems that support
 *   assiociating a cgroup with writeback I/O. See also
 *   Documentation/admin-guide/cgroup-v2.rst.
 */

#include <linux/blk-cgroup.h>
#include <linux/blk-mq.h>
#include <linux/blk_types.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include "blk-ioprio.h"
#include "blk-rq-qos.h"

/**
 * enum prio_policy - I/O priority class policy.
 * @POLICY_NO_CHANGE: (default) do not modify the I/O priority class.
 * @POLICY_NONE_TO_RT: modify IOPRIO_CLASS_NONE into IOPRIO_CLASS_RT.
 * @POLICY_RESTRICT_TO_BE: modify IOPRIO_CLASS_NONE and IOPRIO_CLASS_RT into
 *		IOPRIO_CLASS_BE.
 * @POLICY_ALL_TO_IDLE: change the I/O priority class into IOPRIO_CLASS_IDLE.
 *
 * See also <linux/ioprio.h>.
 */
enum prio_policy {
	POLICY_NO_CHANGE	= 0,
	POLICY_NONE_TO_RT	= 1,
	POLICY_RESTRICT_TO_BE	= 2,
	POLICY_ALL_TO_IDLE	= 3,
};

static const char *policy_name[] = {
	[POLICY_NO_CHANGE]	= "no-change",
	[POLICY_NONE_TO_RT]	= "none-to-rt",
	[POLICY_RESTRICT_TO_BE]	= "restrict-to-be",
	[POLICY_ALL_TO_IDLE]	= "idle",
};

static struct blkcg_policy ioprio_policy;

/**
 * struct ioprio_blkg - Per (cgroup, request queue) data.
 * @pd: blkg_policy_data structure.
 */
struct ioprio_blkg {
	struct blkg_policy_data pd;
};

/**
 * struct ioprio_blkcg - Per cgroup data.
 * @cpd: blkcg_policy_data structure.
 * @prio_policy: One of the IOPRIO_CLASS_* values. See also <linux/ioprio.h>.
 */
struct ioprio_blkcg {
	struct blkcg_policy_data cpd;
	enum prio_policy	 prio_policy;
};

static inline struct ioprio_blkg *pd_to_ioprio(struct blkg_policy_data *pd)
{
	return pd ? container_of(pd, struct ioprio_blkg, pd) : NULL;
}

static struct ioprio_blkcg *blkcg_to_ioprio_blkcg(struct blkcg *blkcg)
{
	return container_of(blkcg_to_cpd(blkcg, &ioprio_policy),
			    struct ioprio_blkcg, cpd);
}

static struct ioprio_blkcg *
ioprio_blkcg_from_css(struct cgroup_subsys_state *css)
{
	return blkcg_to_ioprio_blkcg(css_to_blkcg(css));
}

static struct ioprio_blkcg *ioprio_blkcg_from_bio(struct bio *bio)
{
	struct blkg_policy_data *pd = blkg_to_pd(bio->bi_blkg, &ioprio_policy);

	if (!pd)
		return NULL;

	return blkcg_to_ioprio_blkcg(pd->blkg->blkcg);
}

static int ioprio_show_prio_policy(struct seq_file *sf, void *v)
{
	struct ioprio_blkcg *blkcg = ioprio_blkcg_from_css(seq_css(sf));

	seq_printf(sf, "%s\n", policy_name[blkcg->prio_policy]);
	return 0;
}

static ssize_t ioprio_set_prio_policy(struct kernfs_open_file *of, char *buf,
				      size_t nbytes, loff_t off)
{
	struct ioprio_blkcg *blkcg = ioprio_blkcg_from_css(of_css(of));
	int ret;

	if (off != 0)
		return -EIO;
	/* kernfs_fop_write_iter() terminates 'buf' with '\0'. */
	ret = sysfs_match_string(policy_name, buf);
	if (ret < 0)
		return ret;
	blkcg->prio_policy = ret;

	return nbytes;
}

static struct blkg_policy_data *
ioprio_alloc_pd(gfp_t gfp, struct request_queue *q, struct blkcg *blkcg)
{
	struct ioprio_blkg *ioprio_blkg;

	ioprio_blkg = kzalloc(sizeof(*ioprio_blkg), gfp);
	if (!ioprio_blkg)
		return NULL;

	return &ioprio_blkg->pd;
}

static void ioprio_free_pd(struct blkg_policy_data *pd)
{
	struct ioprio_blkg *ioprio_blkg = pd_to_ioprio(pd);

	kfree(ioprio_blkg);
}

static struct blkcg_policy_data *ioprio_alloc_cpd(gfp_t gfp)
{
	struct ioprio_blkcg *blkcg;

	blkcg = kzalloc(sizeof(*blkcg), gfp);
	if (!blkcg)
		return NULL;
	blkcg->prio_policy = POLICY_NO_CHANGE;
	return &blkcg->cpd;
}

static void ioprio_free_cpd(struct blkcg_policy_data *cpd)
{
	struct ioprio_blkcg *blkcg = container_of(cpd, typeof(*blkcg), cpd);

	kfree(blkcg);
}

#define IOPRIO_ATTRS						\
	{							\
		.name		= "prio.class",			\
		.seq_show	= ioprio_show_prio_policy,	\
		.write		= ioprio_set_prio_policy,	\
	},							\
	{ } /* sentinel */

/* cgroup v2 attributes */
static struct cftype ioprio_files[] = {
	IOPRIO_ATTRS
};

/* cgroup v1 attributes */
static struct cftype ioprio_legacy_files[] = {
	IOPRIO_ATTRS
};

static struct blkcg_policy ioprio_policy = {
	.dfl_cftypes	= ioprio_files,
	.legacy_cftypes = ioprio_legacy_files,

	.cpd_alloc_fn	= ioprio_alloc_cpd,
	.cpd_free_fn	= ioprio_free_cpd,

	.pd_alloc_fn	= ioprio_alloc_pd,
	.pd_free_fn	= ioprio_free_pd,
};

struct blk_ioprio {
	struct rq_qos rqos;
};

static void blkcg_ioprio_track(struct rq_qos *rqos, struct request *rq,
			       struct bio *bio)
{
	struct ioprio_blkcg *blkcg = ioprio_blkcg_from_bio(bio);

	/*
	 * Except for IOPRIO_CLASS_NONE, higher I/O priority numbers
	 * correspond to a lower priority. Hence, the max_t() below selects
	 * the lower priority of bi_ioprio and the cgroup I/O priority class.
	 * If the cgroup policy has been set to POLICY_NO_CHANGE == 0, the
	 * bio I/O priority is not modified. If the bio I/O priority equals
	 * IOPRIO_CLASS_NONE, the cgroup I/O priority is assigned to the bio.
	 */
	bio->bi_ioprio = max_t(u16, bio->bi_ioprio,
			       IOPRIO_PRIO_VALUE(blkcg->prio_policy, 0));
}

static void blkcg_ioprio_exit(struct rq_qos *rqos)
{
	struct blk_ioprio *blkioprio_blkg =
		container_of(rqos, typeof(*blkioprio_blkg), rqos);

	blkcg_deactivate_policy(rqos->q, &ioprio_policy);
	kfree(blkioprio_blkg);
}

static struct rq_qos_ops blkcg_ioprio_ops = {
	.track	= blkcg_ioprio_track,
	.exit	= blkcg_ioprio_exit,
};

int blk_ioprio_init(struct request_queue *q)
{
	struct blk_ioprio *blkioprio_blkg;
	struct rq_qos *rqos;
	int ret;

	blkioprio_blkg = kzalloc(sizeof(*blkioprio_blkg), GFP_KERNEL);
	if (!blkioprio_blkg)
		return -ENOMEM;

	ret = blkcg_activate_policy(q, &ioprio_policy);
	if (ret) {
		kfree(blkioprio_blkg);
		return ret;
	}

	rqos = &blkioprio_blkg->rqos;
	rqos->id = RQ_QOS_IOPRIO;
	rqos->ops = &blkcg_ioprio_ops;
	rqos->q = q;

	/*
	 * Registering the rq-qos policy after activating the blk-cgroup
	 * policy guarantees that ioprio_blkcg_from_bio(bio) != NULL in the
	 * rq-qos callbacks.
	 */
	rq_qos_add(q, rqos);

	return 0;
}

static int __init ioprio_init(void)
{
	return blkcg_policy_register(&ioprio_policy);
}

static void __exit ioprio_exit(void)
{
	blkcg_policy_unregister(&ioprio_policy);
}

module_init(ioprio_init);
module_exit(ioprio_exit);