1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
|
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
* Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
* Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
* Copyright (C) Frederic Rible F1OAT (frible@teaser.fr)
*/
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/kernel.h>
#include <linux/timer.h>
#include <linux/string.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/slab.h>
#include <net/ax25.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/tcp_states.h>
#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
/*
* This routine purges all the queues of frames.
*/
void ax25_clear_queues(ax25_cb *ax25)
{
skb_queue_purge(&ax25->write_queue);
skb_queue_purge(&ax25->ack_queue);
skb_queue_purge(&ax25->reseq_queue);
skb_queue_purge(&ax25->frag_queue);
}
/*
* This routine purges the input queue of those frames that have been
* acknowledged. This replaces the boxes labelled "V(a) <- N(r)" on the
* SDL diagram.
*/
void ax25_frames_acked(ax25_cb *ax25, unsigned short nr)
{
struct sk_buff *skb;
/*
* Remove all the ack-ed frames from the ack queue.
*/
if (ax25->va != nr) {
while (skb_peek(&ax25->ack_queue) != NULL && ax25->va != nr) {
skb = skb_dequeue(&ax25->ack_queue);
kfree_skb(skb);
ax25->va = (ax25->va + 1) % ax25->modulus;
}
}
}
void ax25_requeue_frames(ax25_cb *ax25)
{
struct sk_buff *skb;
/*
* Requeue all the un-ack-ed frames on the output queue to be picked
* up by ax25_kick called from the timer. This arrangement handles the
* possibility of an empty output queue.
*/
while ((skb = skb_dequeue_tail(&ax25->ack_queue)) != NULL)
skb_queue_head(&ax25->write_queue, skb);
}
/*
* Validate that the value of nr is between va and vs. Return true or
* false for testing.
*/
int ax25_validate_nr(ax25_cb *ax25, unsigned short nr)
{
unsigned short vc = ax25->va;
while (vc != ax25->vs) {
if (nr == vc) return 1;
vc = (vc + 1) % ax25->modulus;
}
if (nr == ax25->vs) return 1;
return 0;
}
/*
* This routine is the centralised routine for parsing the control
* information for the different frame formats.
*/
int ax25_decode(ax25_cb *ax25, struct sk_buff *skb, int *ns, int *nr, int *pf)
{
unsigned char *frame;
int frametype = AX25_ILLEGAL;
frame = skb->data;
*ns = *nr = *pf = 0;
if (ax25->modulus == AX25_MODULUS) {
if ((frame[0] & AX25_S) == 0) {
frametype = AX25_I; /* I frame - carries NR/NS/PF */
*ns = (frame[0] >> 1) & 0x07;
*nr = (frame[0] >> 5) & 0x07;
*pf = frame[0] & AX25_PF;
} else if ((frame[0] & AX25_U) == 1) { /* S frame - take out PF/NR */
frametype = frame[0] & 0x0F;
*nr = (frame[0] >> 5) & 0x07;
*pf = frame[0] & AX25_PF;
} else if ((frame[0] & AX25_U) == 3) { /* U frame - take out PF */
frametype = frame[0] & ~AX25_PF;
*pf = frame[0] & AX25_PF;
}
skb_pull(skb, 1);
} else {
if ((frame[0] & AX25_S) == 0) {
frametype = AX25_I; /* I frame - carries NR/NS/PF */
*ns = (frame[0] >> 1) & 0x7F;
*nr = (frame[1] >> 1) & 0x7F;
*pf = frame[1] & AX25_EPF;
skb_pull(skb, 2);
} else if ((frame[0] & AX
}
/*
* buffered writeback throttling. loosely based on CoDel. We can't drop
* packets for IO scheduling, so the logic is something like this:
*
* - Monitor latencies in a defined window of time.
* - If the minimum latency in the above window exceeds some target, increment
* scaling step and scale down queue depth by a factor of 2x. The monitoring
* window is then shrunk to 100 / sqrt(scaling step + 1).
* - For any window where we don't have solid data on what the latencies
* look like, retain status quo.
* - If latencies look good, decrement scaling step.
* - If we're only doing writes, allow the scaling step to go negative. This
* will temporarily boost write performance, snapping back to a stable
* scaling step of 0 if reads show up or the heavy writers finish. Unlike
* positive scaling steps where we shrink the monitoring window, a negative
* scaling step retains the default step==0 window size.
*
* Copyright (C) 2016 Jens Axboe
*
*/
#include <linux/kernel.h>
#include <linux/blk_types.h>
#include <linux/slab.h>
#include <linux/backing-dev.h>
#include <linux/swap.h>
#include "blk-wbt.h"
#include "blk-rq-qos.h"
#define CREATE_TRACE_POINTS
#include <trace/events/wbt.h>
static inline void wbt_clear_state(struct request *rq)
{
rq->wbt_flags = 0;
}
static inline enum wbt_flags wbt_flags(struct request *rq)
{
return rq->wbt_flags;
}
static inline bool wbt_is_tracked(struct request *rq)
{
return rq->wbt_flags & WBT_TRACKED;
}
static inline bool wbt_is_read(struct request *rq)
{
return rq->wbt_flags & WBT_READ;
}
enum {
/*
* Default setting, we'll scale up (to 75% of QD max) or down (min 1)
* from here depending on device stats
*/
RWB_DEF_DEPTH = 16,
/*
* 100msec window
*/
RWB_WINDOW_NSEC = 100 * 1000 * 1000ULL,
/*
* Disregard stats, if we don't meet this minimum
*/
RWB_MIN_WRITE_SAMPLES = 3,
/*
* If we have this number of consecutive windows with not enough
* information to scale up or down, scale up.
*/
RWB_UNKNOWN_BUMP = 5,
};
static inline bool rwb_enabled(struct rq_wb *rwb)
{
return rwb && rwb->enable_state != WBT_STATE_OFF_DEFAULT &&
rwb->wb_normal != 0;
}
static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
{
if (rwb_enabled(rwb)) {
const unsigned long cur = jiffies;
if (cur != *var)
*var = cur;
}
}
/*
* If a task was rate throttled in balance_dirty_pages() within the last
* second or so, use that to indicate a higher cleaning rate.
*/
static bool wb_recent_wait(struct rq_wb *rwb)
{
struct bdi_writeback *wb = &rwb->rqos.q->backing_dev_info->wb;
return time_before(jiffies, wb->dirty_sleep + HZ);
}
static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
enum wbt_flags wb_acct)
{
if (wb_acct & WBT_KSWAPD)
return &rwb->rq_wait[WBT_RWQ_KSWAPD];
else if (wb_acct & WBT_DISCARD)
return &rwb->rq_wait[WBT_RWQ_DISCARD];
return &rwb->rq_wait[WBT_RWQ_BG];
}
static void rwb_wake_all(struct rq_wb *rwb)
{
int i;
for (i = 0; i < WBT_NUM_RWQ; i++) {
struct rq_wait *rqw = &rwb->rq_wait[i];
if (wq_has_sleeper(&rqw->wait))
wake_up_all(&rqw->wait);
}
}
static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw,
enum wbt_flags wb_acct)
{
int inflight, limit;
inflight = atomic_dec_return(&rqw->inflight);
/*
* wbt got disabled with IO in flight. Wake up any potential
* waiters, we don't have to do more than that.
*/
if (unlikely(!rwb_enabled(rwb))) {
rwb_wake_all(rwb);
return;
}
/*
* For discards, our limit is always the background. For writes, if
* the device does write back caching, drop further down before we
* wake people up.
*/
if (wb_acct & WBT_DISCARD)
limit = rwb->wb_background;
else if (rwb->wc && !wb_recent_wait(rwb))
limit = 0;
else
limit = rwb->wb_normal;
/*
* Don't wake anyone up if we are above the normal limit.
*/
if (inflight && inflight >= limit)
return;
if (wq_has_sleeper(&rqw->wait)) {
int diff = limit - inflight;
if (!inflight || diff >= rwb->wb_background / 2)
wake_up_all(&rqw->wait);
}
}
static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
{
struct rq_wb *rwb = RQWB(rqos);
struct rq_wait *rqw;
if (!(wb_acct & WBT_TRACKED))
return;
rqw = get_rq_wait(rwb, wb_acct);
wbt_rqw_done(rwb, rqw, wb_acct);
}
/*
* Called on completion of a request. Note that it's also called when
* a request is merged, when the request gets freed.
*/
static void wbt_done(struct rq_qos *rqos, struct request *rq)
{
struct rq_wb *rwb = RQWB(rqos);
if (!wbt_is_tracked(rq)) {
if (rwb->sync_cookie == rq) {
rwb->sync_issue = 0;
rwb->sync_cookie = NULL;
}
if (wbt_is_read(rq))
wb_timestamp(rwb, &rwb->last_comp);
} else {
WARN_ON_ONCE(rq == rwb->sync_cookie);
__wbt_done(rqos, wbt_flags(rq));
}
wbt_clear_state(rq);
}
static inline bool stat_sample_valid(struct blk_rq_stat *stat)
{
/*
* We need at least one read sample, and a minimum of
* RWB_MIN_WRITE_SAMPLES. We require some write samples to know
* that it's writes impacting us, and not just some sole read on
* a device that is in a lower power state.
*/
return (stat[READ].nr_samples >= 1 &&
stat[WRITE].nr_samples >= RWB_MIN_WRITE_SAMPLES);
}
static u64 rwb_sync_issue_lat(struct rq_wb *rwb)
{
u64 now, issue = READ_ONCE(rwb->sync_issue);
if (!issue || !rwb->
|