aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/act_connmark.c9
-rw-r--r--net/sched/act_csum.c2
-rw-r--r--net/sched/act_ife.c7
-rw-r--r--net/sched/act_mirred.c28
-rw-r--r--net/sched/act_pedit.c12
-rw-r--r--net/sched/act_skbedit.c2
-rw-r--r--net/sched/act_tunnel_key.c4
-rw-r--r--net/sched/cls_api.c106
-rw-r--r--net/sched/cls_basic.c11
-rw-r--r--net/sched/cls_bpf.c11
-rw-r--r--net/sched/cls_flow.c8
-rw-r--r--net/sched/cls_flower.c15
-rw-r--r--net/sched/cls_fw.c11
-rw-r--r--net/sched/cls_matchall.c12
-rw-r--r--net/sched/cls_route.c15
-rw-r--r--net/sched/cls_rsvp.h17
-rw-r--r--net/sched/cls_tcindex.c102
-rw-r--r--net/sched/cls_u32.c11
-rw-r--r--net/sched/em_ipset.c2
-rw-r--r--net/sched/em_meta.c4
-rw-r--r--net/sched/ematch.c5
-rw-r--r--net/sched/sch_api.c49
-rw-r--r--net/sched/sch_atm.c8
-rw-r--r--net/sched/sch_cake.c65
-rw-r--r--net/sched/sch_cbs.c12
-rw-r--r--net/sched/sch_choke.c3
-rw-r--r--net/sched/sch_dsmark.c6
-rw-r--r--net/sched/sch_etf.c7
-rw-r--r--net/sched/sch_fq.c24
-rw-r--r--net/sched/sch_fq_codel.c2
-rw-r--r--net/sched/sch_generic.c17
-rw-r--r--net/sched/sch_hhf.c8
-rw-r--r--net/sched/sch_mq.c4
-rw-r--r--net/sched/sch_mqprio.c7
-rw-r--r--net/sched/sch_multiq.c2
-rw-r--r--net/sched/sch_netem.c13
-rw-r--r--net/sched/sch_prio.c12
-rw-r--r--net/sched/sch_sfb.c20
-rw-r--r--net/sched/sch_sfq.c23
-rw-r--r--net/sched/sch_skbprio.c3
-rw-r--r--net/sched/sch_taprio.c42
-rw-r--r--net/sched/sch_teql.c2
42 files changed, 512 insertions, 211 deletions
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 2b43cacf82af..1a8f2f85ea1a 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -43,17 +43,20 @@ static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a,
tcf_lastuse_update(&ca->tcf_tm);
bstats_update(&ca->tcf_bstats, skb);
- if (skb->protocol == htons(ETH_P_IP)) {
+ switch (skb_protocol(skb, true)) {
+ case htons(ETH_P_IP):
if (skb->len < sizeof(struct iphdr))
goto out;
proto = NFPROTO_IPV4;
- } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ break;
+ case htons(ETH_P_IPV6):
if (skb->len < sizeof(struct ipv6hdr))
goto out;
proto = NFPROTO_IPV6;
- } else {
+ break;
+ default:
goto out;
}
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index d3cfad88dc3a..428b1ae00123 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -587,7 +587,7 @@ static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a,
goto drop;
update_flags = params->update_flags;
- protocol = tc_skb_protocol(skb);
+ protocol = skb_protocol(skb, false);
again:
switch (protocol) {
case cpu_to_be16(ETH_P_IP):
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 3a31e241c647..a0cfb4793c93 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -536,6 +536,9 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
}
ife = to_ife(*a);
+ if (ret == ACT_P_CREATED)
+ INIT_LIST_HEAD(&ife->metalist);
+
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
if (err < 0)
goto release_idr;
@@ -565,10 +568,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
p->eth_type = ife_type;
}
-
- if (ret == ACT_P_CREATED)
- INIT_LIST_HEAD(&ife->metalist);
-
if (tb[TCA_IFE_METALST]) {
err = nla_parse_nested_deprecated(tb2, IFE_META_MAX,
tb[TCA_IFE_METALST], NULL,
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index bd3178a95cb9..a82400f219f3 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -215,8 +215,10 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
bool use_reinsert;
bool want_ingress;
bool is_redirect;
+ bool expects_nh;
int m_eaction;
int mac_len;
+ bool at_nh;
tcf_lastuse_update(&m->tcf_tm);
bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
@@ -249,19 +251,19 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
goto out;
}
- /* If action's target direction differs than filter's direction,
- * and devices expect a mac header on xmit, then mac push/pull is
- * needed.
- */
want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
- if (skb_at_tc_ingress(skb) != want_ingress && m_mac_header_xmit) {
- if (!skb_at_tc_ingress(skb)) {
- /* caught at egress, act ingress: pull mac */
- mac_len = skb_network_header(skb) - skb_mac_header(skb);
+
+ expects_nh = want_ingress || !m_mac_header_xmit;
+ at_nh = skb->data == skb_network_header(skb);
+ if (at_nh != expects_nh) {
+ mac_len = skb_at_tc_ingress(skb) ? skb->mac_len :
+ skb_network_header(skb) - skb_mac_header(skb);
+ if (expects_nh) {
+ /* target device/action expect data at nh */
skb_pull_rcsum(skb2, mac_len);
} else {
- /* caught at ingress, act egress: push mac */
- skb_push_rcsum(skb2, skb->mac_len);
+ /* target device/action expect data at mac */
+ skb_push_rcsum(skb2, mac_len);
}
}
@@ -270,10 +272,8 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
/* mirror is always swallowed */
if (is_redirect) {
- skb2->tc_redirected = 1;
- skb2->tc_from_ingress = skb2->tc_at_ingress;
- if (skb2->tc_from_ingress)
- skb2->tstamp = 0;
+ skb_set_redirected(skb2, skb2->tc_at_ingress);
+
/* let's the caller reinsert the packet, if possible */
if (use_reinsert) {
res->ingress = want_ingress;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index cdfaa79382a2..b5bc631b96b7 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -43,7 +43,7 @@ static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla,
int err = -EINVAL;
int rem;
- if (!nla || !n)
+ if (!nla)
return NULL;
keys_ex = kcalloc(n, sizeof(*k), GFP_KERNEL);
@@ -170,6 +170,10 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
}
parm = nla_data(pattr);
+ if (!parm->nkeys) {
+ NL_SET_ERR_MSG_MOD(extack, "Pedit requires keys to be passed");
+ return -EINVAL;
+ }
ksize = parm->nkeys * sizeof(struct tc_pedit_key);
if (nla_len(pattr) < sizeof(*parm) + ksize) {
NL_SET_ERR_MSG_ATTR(extack, pattr, "Length of TCA_PEDIT_PARMS or TCA_PEDIT_PARMS_EX pedit attribute is invalid");
@@ -183,12 +187,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
index = parm->index;
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
- if (!parm->nkeys) {
- tcf_idr_cleanup(tn, index);
- NL_SET_ERR_MSG_MOD(extack, "Pedit requires keys to be passed");
- ret = -EINVAL;
- goto out_free;
- }
ret = tcf_idr_create(tn, index, est, a,
&act_pedit_ops, bind, false);
if (ret) {
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 6a8d3337c577..f98b2791ecec 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -41,7 +41,7 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a,
if (params->flags & SKBEDIT_F_INHERITDSFIELD) {
int wlen = skb_network_offset(skb);
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
wlen += sizeof(struct iphdr);
if (!pskb_may_pull(skb, wlen))
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 2f83a79f76aa..d55669e14741 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -135,6 +135,10 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst,
if (opt_len < 0)
return opt_len;
opts_len += opt_len;
+ if (opts_len > IP_TUNNEL_OPTS_MAX) {
+ NL_SET_ERR_MSG(extack, "Tunnel options exceeds max size");
+ return -EINVAL;
+ }
if (dst) {
dst_len -= opt_len;
dst += opt_len;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index ffc6aa6d0f64..2b94d6f02dbb 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -21,6 +21,7 @@
#include <linux/slab.h>
#include <linux/idr.h>
#include <linux/rhashtable.h>
+#include <linux/jhash.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/netlink.h>
@@ -44,6 +45,62 @@ static LIST_HEAD(tcf_proto_base);
/* Protects list of registered TC modules. It is pure SMP lock. */
static DEFINE_RWLOCK(cls_mod_lock);
+static u32 destroy_obj_hashfn(const struct tcf_proto *tp)
+{
+ return jhash_3words(tp->chain->index, tp->prio,
+ (__force __u32)tp->protocol, 0);
+}
+
+static void tcf_proto_signal_destroying(struct tcf_chain *chain,
+ struct tcf_proto *tp)
+{
+ struct tcf_block *block = chain->block;
+
+ mutex_lock(&block->proto_destroy_lock);
+ hash_add_rcu(block->proto_destroy_ht, &tp->destroy_ht_node,
+ destroy_obj_hashfn(tp));
+ mutex_unlock(&block->proto_destroy_lock);
+}
+
+static bool tcf_proto_cmp(const struct tcf_proto *tp1,
+ const struct tcf_proto *tp2)
+{
+ return tp1->chain->index == tp2->chain->index &&
+ tp1->prio == tp2->prio &&
+ tp1->protocol == tp2->protocol;
+}
+
+static bool tcf_proto_exists_destroying(struct tcf_chain *chain,
+ struct tcf_proto *tp)
+{
+ u32 hash = destroy_obj_hashfn(tp);
+ struct tcf_proto *iter;
+ bool found = false;
+
+ rcu_read_lock();
+ hash_for_each_possible_rcu(chain->block->proto_destroy_ht, iter,
+ destroy_ht_node, hash) {
+ if (tcf_proto_cmp(tp, iter)) {
+ found = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return found;
+}
+
+static void
+tcf_proto_signal_destroyed(struct tcf_chain *chain, struct tcf_proto *tp)
+{
+ struct tcf_block *block = chain->block;
+
+ mutex_lock(&block->proto_destroy_lock);
+ if (hash_hashed(&tp->destroy_ht_node))
+ hash_del_rcu(&tp->destroy_ht_node);
+ mutex_unlock(&block->proto_destroy_lock);
+}
+
/* Find classifier type by string name */
static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
@@ -231,9 +288,11 @@ static void tcf_proto_get(struct tcf_proto *tp)
static void tcf_chain_put(struct tcf_chain *chain);
static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
- struct netlink_ext_ack *extack)
+ bool sig_destroy, struct netlink_ext_ack *extack)
{
tp->ops->destroy(tp, rtnl_held, extack);
+ if (sig_destroy)
+ tcf_proto_signal_destroyed(tp->chain, tp);
tcf_chain_put(tp->chain);
module_put(tp->ops->owner);
kfree_rcu(tp, rcu);
@@ -243,7 +302,7 @@ static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
struct netlink_ext_ack *extack)
{
if (refcount_dec_and_test(&tp->refcnt))
- tcf_proto_destroy(tp, rtnl_held, extack);
+ tcf_proto_destroy(tp, rtnl_held, true, extack);
}
static int walker_check_empty(struct tcf_proto *tp, void *fh,
@@ -367,6 +426,7 @@ static bool tcf_chain_detach(struct tcf_chain *chain)
static void tcf_block_destroy(struct tcf_block *block)
{
mutex_destroy(&block->lock);
+ mutex_destroy(&block->proto_destroy_lock);
kfree_rcu(block, rcu);
}
@@ -542,6 +602,12 @@ static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
mutex_lock(&chain->filter_chain_lock);
tp = tcf_chain_dereference(chain->filter_chain, chain);
+ while (tp) {
+ tp_next = rcu_dereference_protected(tp->next, 1);
+ tcf_proto_signal_destroying(chain, tp);
+ tp = tp_next;
+ }
+ tp = tcf_chain_dereference(chain->filter_chain, chain);
RCU_INIT_POINTER(chain->filter_chain, NULL);
tcf_chain0_head_change(chain, NULL);
chain->flushing = true;
@@ -979,6 +1045,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
return ERR_PTR(-ENOMEM);
}
mutex_init(&block->lock);
+ mutex_init(&block->proto_destroy_lock);
INIT_LIST_HEAD(&block->chain_list);
INIT_LIST_HEAD(&block->cb_list);
INIT_LIST_HEAD(&block->owner_list);
@@ -1660,7 +1727,7 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
reclassify:
#endif
for (; tp; tp = rcu_dereference_bh(tp->next)) {
- __be16 protocol = tc_skb_protocol(skb);
+ __be16 protocol = skb_protocol(skb, false);
int err;
if (tp->protocol != protocol &&
@@ -1758,6 +1825,12 @@ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
mutex_lock(&chain->filter_chain_lock);
+ if (tcf_proto_exists_destroying(chain, tp_new)) {
+ mutex_unlock(&chain->filter_chain_lock);
+ tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
+ return ERR_PTR(-EAGAIN);
+ }
+
tp = tcf_chain_tp_find(chain, &chain_info,
protocol, prio, false);
if (!tp)
@@ -1765,10 +1838,10 @@ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
mutex_unlock(&chain->filter_chain_lock);
if (tp) {
- tcf_proto_destroy(tp_new, rtnl_held, NULL);
+ tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
tp_new = tp;
} else if (err) {
- tcf_proto_destroy(tp_new, rtnl_held, NULL);
+ tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
tp_new = ERR_PTR(err);
}
@@ -1806,6 +1879,7 @@ static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
return;
}
+ tcf_proto_signal_destroying(chain, tp);
next = tcf_chain_dereference(chain_info.next, chain);
if (tp == chain->filter_chain)
tcf_chain0_head_change(chain, next);
@@ -2075,6 +2149,7 @@ replay:
err = PTR_ERR(block);
goto errout;
}
+ block->classid = parent;
chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
if (chain_index > TC_ACT_EXT_VAL_MASK) {
@@ -2125,9 +2200,8 @@ replay:
&chain_info));
mutex_unlock(&chain->filter_chain_lock);
- tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]),
- protocol, prio, chain, rtnl_held,
- extack);
+ tp_new = tcf_proto_create(name, protocol, prio, chain,
+ rtnl_held, extack);
if (IS_ERR(tp_new)) {
err = PTR_ERR(tp_new);
goto errout_tp;
@@ -2325,6 +2399,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
err = -EINVAL;
goto errout_locked;
} else if (t->tcm_handle == 0) {
+ tcf_proto_signal_destroying(chain, tp);
tcf_chain_tp_remove(chain, &chain_info, tp);
mutex_unlock(&chain->filter_chain_lock);
@@ -2617,12 +2692,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
parent = tcm->tcm_parent;
- if (!parent) {
+ if (!parent)
q = dev->qdisc;
- parent = q->handle;
- } else {
+ else
q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
- }
if (!q)
goto out;
cops = q->ops->cl_ops;
@@ -2638,6 +2711,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
block = cops->tcf_block(q, cl, NULL);
if (!block)
goto out;
+ parent = block->classid;
if (tcf_block_shared(block))
q = NULL;
}
@@ -2783,13 +2857,19 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
struct netlink_ext_ack *extack)
{
const struct tcf_proto_ops *ops;
+ char name[IFNAMSIZ];
void *tmplt_priv;
/* If kind is not set, user did not specify template. */
if (!tca[TCA_KIND])
return 0;
- ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), true, extack);
+ if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
+ NL_SET_ERR_MSG(extack, "Specified TC chain template name too long");
+ return -EINVAL;
+ }
+
+ ops = tcf_proto_lookup_ops(name, true, extack);
if (IS_ERR(ops))
return PTR_ERR(ops);
if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 4aafbe3d435c..f256a7c69093 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -263,12 +263,17 @@ skip:
}
}
-static void basic_bind_class(void *fh, u32 classid, unsigned long cl)
+static void basic_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
+ unsigned long base)
{
struct basic_filter *f = fh;
- if (f && f->res.classid == classid)
- f->res.class = cl;
+ if (f && f->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &f->res, base);
+ else
+ __tcf_unbind_filter(q, &f->res);
+ }
}
static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh,
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 691f71830134..64d8a219bf3c 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -625,12 +625,17 @@ nla_put_failure:
return -1;
}
-static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl)
+static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl,
+ void *q, unsigned long base)
{
struct cls_bpf_prog *prog = fh;
- if (prog && prog->res.classid == classid)
- prog->res.class = cl;
+ if (prog && prog->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &prog->res, base);
+ else
+ __tcf_unbind_filter(q, &prog->res);
+ }
}
static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg,
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 80ae7b9fa90a..ab53a93b2f2b 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -80,7 +80,7 @@ static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
if (dst)
return ntohl(dst);
- return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
+ return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true);
}
static u32 flow_get_proto(const struct sk_buff *skb,
@@ -104,7 +104,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb,
if (flow->ports.ports)
return ntohs(flow->ports.dst);
- return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
+ return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true);
}
static u32 flow_get_iif(const struct sk_buff *skb)
@@ -151,7 +151,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb)
static u32 flow_get_nfct_src(const struct sk_buff *skb,
const struct flow_keys *flow)
{
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
return ntohl(CTTUPLE(skb, src.u3.ip));
case htons(ETH_P_IPV6):
@@ -164,7 +164,7 @@ fallback:
static u32 flow_get_nfct_dst(const struct sk_buff *skb,
const struct flow_keys *flow)
{
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
return ntohl(CTTUPLE(skb, dst.u3.ip));
case htons(ETH_P_IPV6):
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index eedd5786c084..5e112f9e6078 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -282,13 +282,14 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct fl_flow_key skb_mkey;
list_for_each_entry_rcu(mask, &head->masks, list) {
+ flow_dissector_init_keys(&skb_key.control, &skb_key.basic);
fl_clear_masked_range(&skb_key, mask);
skb_key.indev_ifindex = skb->skb_iif;
/* skb_flow_dissect() does not set n_proto in case an unknown
* protocol, so do it rather here.
*/
- skb_key.basic.n_proto = skb->protocol;
+ skb_key.basic.n_proto = skb_protocol(skb, false);
skb_flow_dissect_tunnel_info(skb, &mask->dissector, &skb_key);
skb_flow_dissect(skb, &mask->dissector, &skb_key, 0);
@@ -704,6 +705,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_ENC_OPTS] = { .type = NLA_NESTED },
[TCA_FLOWER_KEY_ENC_OPTS_MASK] = { .type = NLA_NESTED },
+ [TCA_FLOWER_FLAGS] = { .type = NLA_U32 },
};
static const struct nla_policy
@@ -2402,12 +2404,17 @@ nla_put_failure:
return -EMSGSIZE;
}
-static void fl_bind_class(void *fh, u32 classid, unsigned long cl)
+static void fl_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
+ unsigned long base)
{
struct cls_fl_filter *f = fh;
- if (f && f->res.classid == classid)
- f->res.class = cl;
+ if (f && f->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &f->res, base);
+ else
+ __tcf_unbind_filter(q, &f->res);
+ }
}
static struct tcf_proto_ops cls_fl_ops __read_mostly = {
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 4dab833f66cb..b6c36d60ffae 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -432,12 +432,17 @@ nla_put_failure:
return -1;
}
-static void fw_bind_class(void *fh, u32 classid, unsigned long cl)
+static void fw_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
+ unsigned long base)
{
struct fw_filter *f = fh;
- if (f && f->res.classid == classid)
- f->res.class = cl;
+ if (f && f->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &f->res, base);
+ else
+ __tcf_unbind_filter(q, &f->res);
+ }
}
static struct tcf_proto_ops cls_fw_ops __read_mostly = {
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 38c0a9f0f296..2a72a061c41b 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -157,6 +157,7 @@ static void *mall_get(struct tcf_proto *tp, u32 handle)
static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
[TCA_MATCHALL_UNSPEC] = { .type = NLA_UNSPEC },
[TCA_MATCHALL_CLASSID] = { .type = NLA_U32 },
+ [TCA_MATCHALL_FLAGS] = { .type = NLA_U32 },
};
static int mall_set_parms(struct net *net, struct tcf_proto *tp,
@@ -391,12 +392,17 @@ nla_put_failure:
return -1;
}
-static void mall_bind_class(void *fh, u32 classid, unsigned long cl)
+static void mall_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
+ unsigned long base)
{
struct cls_mall_head *head = fh;
- if (head && head->res.classid == classid)
- head->res.class = cl;
+ if (head && head->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &head->res, base);
+ else
+ __tcf_unbind_filter(q, &head->res);
+ }
}
static struct tcf_proto_ops cls_mall_ops __read_mostly = {
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 2d9e0b4484ea..5efa3e7ace15 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -534,8 +534,8 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
fp = &b->ht[h];
for (pfp = rtnl_dereference(*fp); pfp;
fp = &pfp->next, pfp = rtnl_dereference(*fp)) {
- if (pfp == f) {
- *fp = f->next;
+ if (pfp == fold) {
+ rcu_assign_pointer(*fp, fold->next);
break;
}
}
@@ -641,12 +641,17 @@ nla_put_failure:
return -1;
}
-static void route4_bind_class(void *fh, u32 classid, unsigned long cl)
+static void route4_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
+ unsigned long base)
{
struct route4_filter *f = fh;
- if (f && f->res.classid == classid)
- f->res.class = cl;
+ if (f && f->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &f->res, base);
+ else
+ __tcf_unbind_filter(q, &f->res);
+ }
}
static struct tcf_proto_ops cls_route4_ops __read_mostly = {
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 2f3c03b25d5d..d36949d9382c 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -463,10 +463,8 @@ static u32 gen_tunnel(struct rsvp_head *data)
static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
[TCA_RSVP_CLASSID] = { .type = NLA_U32 },
- [TCA_RSVP_DST] = { .type = NLA_BINARY,
- .len = RSVP_DST_LEN * sizeof(u32) },
- [TCA_RSVP_SRC] = { .type = NLA_BINARY,
- .len = RSVP_DST_LEN * sizeof(u32) },
+ [TCA_RSVP_DST] = { .len = RSVP_DST_LEN * sizeof(u32) },
+ [TCA_RSVP_SRC] = { .len = RSVP_DST_LEN * sizeof(u32) },
[TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
};
@@ -738,12 +736,17 @@ nla_put_failure:
return -1;
}
-static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl)
+static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
+ unsigned long base)
{
struct rsvp_filter *f = fh;
- if (f && f->res.classid == classid)
- f->res.class = cl;
+ if (f && f->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &f->res, base);
+ else
+ __tcf_unbind_filter(q, &f->res);
+ }
}
static struct tcf_proto_ops RSVP_OPS __read_mostly = {
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index e573e5a5c794..61e95029c18f 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -11,6 +11,7 @@
#include <linux/skbuff.h>
#include <linux/errno.h>
#include <linux/slab.h>
+#include <linux/refcount.h>
#include <net/act_api.h>
#include <net/netlink.h>
#include <net/pkt_cls.h>
@@ -26,9 +27,12 @@
#define DEFAULT_HASH_SIZE 64 /* optimized for diffserv */
+struct tcindex_data;
+
struct tcindex_filter_result {
struct tcf_exts exts;
struct tcf_result res;
+ struct tcindex_data *p;
struct rcu_work rwork;
};
@@ -49,6 +53,7 @@ struct tcindex_data {
u32 hash; /* hash table size; 0 if undefined */
u32 alloc_hash; /* allocated size */
u32 fall_through; /* 0: only classify if explicit match */
+ refcount_t refcnt; /* a temporary refcnt for perfect hash */
struct rcu_work rwork;
};
@@ -57,6 +62,20 @@ static inline int tcindex_filter_is_set(struct tcindex_filter_result *r)
return tcf_exts_has_actions(&r->exts) || r->res.classid;
}
+static void tcindex_data_get(struct tcindex_data *p)
+{
+ refcount_inc(&p->refcnt);
+}
+
+static void tcindex_data_put(struct tcindex_data *p)
+{
+ if (refcount_dec_and_test(&p->refcnt)) {
+ kfree(p->perfect);
+ kfree(p->h);
+ kfree(p);
+ }
+}
+
static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p,
u16 key)
{
@@ -132,6 +151,7 @@ static int tcindex_init(struct tcf_proto *tp)
p->mask = 0xffff;
p->hash = DEFAULT_HASH_SIZE;
p->fall_through = 1;
+ refcount_set(&p->refcnt, 1); /* Paired with tcindex_destroy_work() */
rcu_assign_pointer(tp->root, p);
return 0;
@@ -141,6 +161,7 @@ static void __tcindex_destroy_rexts(struct tcindex_filter_result *r)
{
tcf_exts_destroy(&r->exts);
tcf_exts_put_net(&r->exts);
+ tcindex_data_put(r->p);
}
static void tcindex_destroy_rexts_work(struct work_struct *work)
@@ -212,6 +233,8 @@ found:
else
__tcindex_destroy_fexts(f);
} else {
+ tcindex_data_get(p);
+
if (tcf_exts_get_net(&r->exts))
tcf_queue_work(&r->rwork, tcindex_destroy_rexts_work);
else
@@ -228,9 +251,7 @@ static void tcindex_destroy_work(struct work_struct *work)
struct tcindex_data,
rwork);
- kfree(p->perfect);
- kfree(p->h);
- kfree(p);
+ tcindex_data_put(p);
}
static inline int
@@ -248,9 +269,11 @@ static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
};
static int tcindex_filter_result_init(struct tcindex_filter_result *r,
+ struct tcindex_data *p,
struct net *net)
{
memset(r, 0, sizeof(*r));
+ r->p = p;
return tcf_exts_init(&r->exts, net, TCA_TCINDEX_ACT,
TCA_TCINDEX_POLICE);
}
@@ -261,8 +284,10 @@ static void tcindex_partial_destroy_work(struct work_struct *work)
struct tcindex_data,
rwork);
+ rtnl_lock();
kfree(p->perfect);
kfree(p);
+ rtnl_unlock();
}
static void tcindex_free_perfect_hash(struct tcindex_data *cp)
@@ -288,6 +313,7 @@ static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp)
TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
if (err < 0)
goto errout;
+ cp->perfect[i].p = cp;
}
return 0;
@@ -332,33 +358,45 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
cp->alloc_hash = p->alloc_hash;
cp->fall_through = p->fall_through;
cp->tp = tp;
+ refcount_set(&cp->refcnt, 1); /* Paired with tcindex_destroy_work() */
+
+ if (tb[TCA_TCINDEX_HASH])
+ cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
+
+ if (tb[TCA_TCINDEX_MASK])
+ cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
+
+ if (tb[TCA_TCINDEX_SHIFT])
+ cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
+
+ if (!cp->hash) {
+ /* Hash not specified, use perfect hash if the upper limit
+ * of the hashing index is below the threshold.
+ */
+ if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
+ cp->hash = (cp->mask >> cp->shift) + 1;
+ else
+ cp->hash = DEFAULT_HASH_SIZE;
+ }
if (p->perfect) {
int i;
if (tcindex_alloc_perfect_hash(net, cp) < 0)
goto errout;
- for (i = 0; i < cp->hash; i++)
+ cp->alloc_hash = cp->hash;
+ for (i = 0; i < min(cp->hash, p->hash); i++)
cp->perfect[i].res = p->perfect[i].res;
balloc = 1;
}
cp->h = p->h;
- err = tcindex_filter_result_init(&new_filter_result, net);
+ err = tcindex_filter_result_init(&new_filter_result, cp, net);
if (err < 0)
- goto errout1;
+ goto errout_alloc;
if (old_r)
cr = r->res;
- if (tb[TCA_TCINDEX_HASH])
- cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
-
- if (tb[TCA_TCINDEX_MASK])
- cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
-
- if (tb[TCA_TCINDEX_SHIFT])
- cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
-
err = -EBUSY;
/* Hash already allocated, make sure that we still meet the
@@ -376,16 +414,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
if (tb[TCA_TCINDEX_FALL_THROUGH])
cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
- if (!cp->hash) {
- /* Hash not specified, use perfect hash if the upper limit
- * of the hashing index is below the threshold.
- */
- if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
- cp->hash = (cp->mask >> cp->shift) + 1;
- else
- cp->hash = DEFAULT_HASH_SIZE;
- }
-
if (!cp->perfect && !cp->h)
cp->alloc_hash = cp->hash;
@@ -431,7 +459,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
goto errout_alloc;
f->key = handle;
f->next = NULL;
- err = tcindex_filter_result_init(&f->result, net);
+ err = tcindex_filter_result_init(&f->result, cp, net);
if (err < 0) {
kfree(f);
goto errout_alloc;
@@ -444,7 +472,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
}
if (old_r && old_r != r) {
- err = tcindex_filter_result_init(old_r, net);
+ err = tcindex_filter_result_init(old_r, cp, net);
if (err < 0) {
kfree(f);
goto errout_alloc;
@@ -484,7 +512,6 @@ errout_alloc:
tcindex_free_perfect_hash(cp);
else if (balloc == 2)
kfree(cp->h);
-errout1:
tcf_exts_destroy(&new_filter_result.exts);
errout:
kfree(cp);
@@ -569,6 +596,14 @@ static void tcindex_destroy(struct tcf_proto *tp, bool rtnl_held,
for (i = 0; i < p->hash; i++) {
struct tcindex_filter_result *r = p->perfect + i;
+ /* tcf_queue_work() does not guarantee the ordering we
+ * want, so we have to take this refcnt temporarily to
+ * ensure 'p' is freed after all tcindex_filter_result
+ * here. Imperfect hash does not need this, because it
+ * uses linked lists rather than an array.
+ */
+ tcindex_data_get(p);
+
tcf_unbind_filter(tp, &r->res);
if (tcf_exts_get_net(&r->exts))
tcf_queue_work(&r->rwork,
@@ -654,12 +689,17 @@ nla_put_failure:
return -1;
}
-static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl)
+static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl,
+ void *q, unsigned long base)
{
struct tcindex_filter_result *r = fh;
- if (r && r->res.classid == classid)
- r->res.class = cl;
+ if (r && r->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &r->res, base);
+ else
+ __tcf_unbind_filter(q, &r->res);
+ }
}
static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index c7727de5e073..0443ac3ecf1e 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -1271,12 +1271,17 @@ static int u32_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
return 0;
}
-static void u32_bind_class(void *fh, u32 classid, unsigned long cl)
+static void u32_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
+ unsigned long base)
{
struct tc_u_knode *n = fh;
- if (n && n->res.classid == classid)
- n->res.class = cl;
+ if (n && n->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &n->res, base);
+ else
+ __tcf_unbind_filter(q, &n->res);
+ }
}
static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index df00566d327d..c95cf86fb431 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -59,7 +59,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
};
int ret, network_offset;
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
state.pf = NFPROTO_IPV4;
if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 82bd14e7ac93..8de4968177bd 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -195,7 +195,7 @@ META_COLLECTOR(int_priority)
META_COLLECTOR(int_protocol)
{
/* Let userspace take care of the byte ordering */
- dst->value = tc_skb_protocol(skb);
+ dst->value = skb_protocol(skb, false);
}
META_COLLECTOR(int_pkttype)
@@ -446,7 +446,7 @@ META_COLLECTOR(int_sk_wmem_queued)
*err = -1;
return;
}
- dst->value = sk->sk_wmem_queued;
+ dst->value = READ_ONCE(sk->sk_wmem_queued);
}
META_COLLECTOR(int_sk_fwd_alloc)
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 8f2ad706784d..dd3b8c11a2e0 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -238,6 +238,9 @@ static int tcf_em_validate(struct tcf_proto *tp,
goto errout;
if (em->ops->change) {
+ err = -EINVAL;
+ if (em_hdr->flags & TCF_EM_SIMPLE)
+ goto errout;
err = em->ops->change(net, data, data_len, em);
if (err < 0)
goto errout;
@@ -263,12 +266,12 @@ static int tcf_em_validate(struct tcf_proto *tp,
}
em->data = (unsigned long) v;
}
+ em->datalen = data_len;
}
}
em->matchid = em_hdr->matchid;
em->flags = em_hdr->flags;
- em->datalen = data_len;
em->net = net;
err = 0;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 1047825d9f48..3eefd60e3a58 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1248,7 +1248,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
rcu_assign_pointer(sch->stab, stab);
}
if (tca[TCA_RATE]) {
- seqcount_t *running;
+ net_seqlock_t *running;
err = -EOPNOTSUPP;
if (sch->flags & TCQ_F_MQROOT) {
@@ -1891,8 +1891,9 @@ static int tclass_del_notify(struct net *net,
struct tcf_bind_args {
struct tcf_walker w;
- u32 classid;
+ unsigned long base;
unsigned long cl;
+ u32 classid;
};
static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
@@ -1903,28 +1904,30 @@ static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
struct Qdisc *q = tcf_block_q(tp->chain->block);
sch_tree_lock(q);
- tp->ops->bind_class(n, a->classid, a->cl);
+ tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
sch_tree_unlock(q);
}
return 0;
}
-static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
- unsigned long new_cl)
+struct tc_bind_class_args {
+ struct qdisc_walker w;
+ unsigned long new_cl;
+ u32 portid;
+ u32 clid;
+};
+
+static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
+ struct qdisc_walker *w)
{
+ struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
const struct Qdisc_class_ops *cops = q->ops->cl_ops;
struct tcf_block *block;
struct tcf_chain *chain;
- unsigned long cl;
- cl = cops->find(q, portid);
- if (!cl)
- return;
- if (!cops->tcf_block)
- return;
block = cops->tcf_block(q, cl, NULL);
if (!block)
- return;
+ return 0;
for (chain = tcf_get_next_chain(block, NULL);
chain;
chain = tcf_get_next_chain(block, chain)) {
@@ -1935,11 +1938,29 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
struct tcf_bind_args arg = {};
arg.w.fn = tcf_node_bind;
- arg.classid = clid;
- arg.cl = new_cl;
+ arg.classid = a->clid;
+ arg.base = cl;
+ arg.cl = a->new_cl;
tp->ops->walk(tp, &arg.w, true);
}
}
+
+ return 0;
+}
+
+static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
+ unsigned long new_cl)
+{
+ const struct Qdisc_class_ops *cops = q->ops->cl_ops;
+ struct tc_bind_class_args args = {};
+
+ if (!cops->tcf_block)
+ return;
+ args.portid = portid;
+ args.clid = clid;
+ args.new_cl = new_cl;
+ args.w.fn = tc_bind_class_walker;
+ q->ops->cl_ops->walk(q, &args.w);
}
#else
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index f4f9b8cdbffb..6385995dc700 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -553,16 +553,16 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt,
if (!p->link.q)
p->link.q = &noop_qdisc;
pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
+ p->link.vcc = NULL;
+ p->link.sock = NULL;
+ p->link.common.classid = sch->handle;
+ p->link.ref = 1;
err = tcf_block_get(&p->link.block, &p->link.filter_list, sch,
extack);
if (err)
return err;
- p->link.vcc = NULL;
- p->link.sock = NULL;
- p->link.common.classid = sch->handle;
- p->link.ref = 1;
tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch);
return 0;
}
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 53a80bc6b13a..896c0562cb42 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -592,7 +592,7 @@ static void cake_update_flowkeys(struct flow_keys *keys,
struct nf_conntrack_tuple tuple = {};
bool rev = !skb->_nfct;
- if (tc_skb_protocol(skb) != htons(ETH_P_IP))
+ if (skb_protocol(skb, true) != htons(ETH_P_IP))
return;
if (!nf_ct_get_tuple_skb(&tuple, skb))
@@ -1515,32 +1515,51 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
return idx + (tin << 16);
}
-static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
+static u8 cake_handle_diffserv(struct sk_buff *skb, bool wash)
{
- int wlen = skb_network_offset(skb);
+ const int offset = skb_network_offset(skb);
+ u16 *buf, buf_;
u8 dscp;
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
- wlen += sizeof(struct iphdr);
- if (!pskb_may_pull(skb, wlen) ||
- skb_try_make_writable(skb, wlen))
+ buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
+ if (unlikely(!buf))
return 0;
- dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
- if (wash && dscp)
+ /* ToS is in the second byte of iphdr */
+ dscp = ipv4_get_dsfield((struct iphdr *)buf) >> 2;
+
+ if (wash && dscp) {
+ const int wlen = offset + sizeof(struct iphdr);
+
+ if (!pskb_may_pull(skb, wlen) ||
+ skb_try_make_writable(skb, wlen))
+ return 0;
+
ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
+ }
+
return dscp;
case htons(ETH_P_IPV6):
- wlen += sizeof(struct ipv6hdr);
- if (!pskb_may_pull(skb, wlen) ||
- skb_try_make_writable(skb, wlen))
+ buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
+ if (unlikely(!buf))
return 0;
- dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
- if (wash && dscp)
+ /* Traffic class is in the first and second bytes of ipv6hdr */
+ dscp = ipv6_get_dsfield((struct ipv6hdr *)buf) >> 2;
+
+ if (wash && dscp) {
+ const int wlen = offset + sizeof(struct ipv6hdr);
+
+ if (!pskb_may_pull(skb, wlen) ||
+ skb_try_make_writable(skb, wlen))
+ return 0;
+
ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
+ }
+
return dscp;
case htons(ETH_P_ARP):
@@ -1557,14 +1576,17 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
{
struct cake_sched_data *q = qdisc_priv(sch);
u32 tin, mark;
+ bool wash;
u8 dscp;
/* Tin selection: Default to diffserv-based selection, allow overriding
- * using firewall marks or skb->priority.
+ * using firewall marks or skb->priority. Call DSCP parsing early if
+ * wash is enabled, otherwise defer to below to skip unneeded parsing.
*/
- dscp = cake_handle_diffserv(skb,
- q->rate_flags & CAKE_FLAG_WASH);
mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft;
+ wash = !!(q->rate_flags & CAKE_FLAG_WASH);
+ if (wash)
+ dscp = cake_handle_diffserv(skb, wash);
if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT)
tin = 0;
@@ -1578,6 +1600,8 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
tin = q->tin_order[TC_H_MIN(skb->priority) - 1];
else {
+ if (!wash)
+ dscp = cake_handle_diffserv(skb, wash);
tin = q->tin_index[dscp];
if (unlikely(tin >= q->tin_cnt))
@@ -1769,7 +1793,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
q->avg_window_begin));
u64 b = q->avg_window_bytes * (u64)NSEC_PER_SEC;
- do_div(b, window_interval);
+ b = div64_u64(b, window_interval);
q->avg_peak_bandwidth =
cake_ewma(q->avg_peak_bandwidth, b,
b > q->avg_peak_bandwidth ? 2 : 8);
@@ -2184,6 +2208,7 @@ static const struct nla_policy cake_policy[TCA_CAKE_MAX + 1] = {
[TCA_CAKE_MPU] = { .type = NLA_U32 },
[TCA_CAKE_INGRESS] = { .type = NLA_U32 },
[TCA_CAKE_ACK_FILTER] = { .type = NLA_U32 },
+ [TCA_CAKE_SPLIT_GSO] = { .type = NLA_U32 },
[TCA_CAKE_FWMARK] = { .type = NLA_U32 },
};
@@ -2678,7 +2703,7 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt,
qdisc_watchdog_init(&q->watchdog, sch);
if (opt) {
- int err = cake_change(sch, opt, extack);
+ err = cake_change(sch, opt, extack);
if (err)
return err;
@@ -2995,7 +3020,7 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl,
PUT_STAT_S32(BLUE_TIMER_US,
ktime_to_us(
ktime_sub(now,
- flow->cvars.blue_timer)));
+ flow->cvars.blue_timer)));
}
if (flow->cvars.dropping) {
PUT_STAT_S32(DROP_NEXT_US,
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index 284ab2dcf47f..35a860764dfa 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -181,6 +181,11 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
s64 credits;
int len;
+ /* The previous packet is still being sent */
+ if (now < q->last) {
+ qdisc_watchdog_schedule_ns(&q->watchdog, q->last);
+ return NULL;
+ }
if (q->credits < 0) {
credits = timediff_to_credits(now - q->last, q->idleslope);
@@ -212,7 +217,12 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
credits += q->credits;
q->credits = max_t(s64, credits, q->locredit);
- q->last = now;
+ /* Estimate of the transmission of the last byte of the packet in ns */
+ if (unlikely(atomic64_read(&q->port_rate) == 0))
+ q->last = now;
+ else
+ q->last = now + div64_s64(len * NSEC_PER_SEC,
+ atomic64_read(&q->port_rate));
return skb;
}
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index dba70377bbd9..4021f726b58f 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -323,7 +323,8 @@ static void choke_reset(struct Qdisc *sch)
sch->q.qlen = 0;
sch->qstats.backlog = 0;
- memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
+ if (q->tab)
+ memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
q->head = q->tail = 0;
red_restart(&q->vars);
}
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 05605b30bef3..2b88710994d7 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -210,7 +210,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (p->set_tc_index) {
int wlen = skb_network_offset(skb);
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
wlen += sizeof(struct iphdr);
if (!pskb_may_pull(skb, wlen) ||
@@ -303,7 +303,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
index = skb->tc_index & (p->indices - 1);
pr_debug("index %d->%d\n", skb->tc_index, index);
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask,
p->mv[index].value);
@@ -320,7 +320,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
*/
if (p->mv[index].mask != 0xff || p->mv[index].value)
pr_warn("%s: unsupported protocol %d\n",
- __func__, ntohs(tc_skb_protocol(skb)));
+ __func__, ntohs(skb_protocol(skb, true)));
break;
}
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index b244881d2b9a..e7e20a05fc13 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c
@@ -77,7 +77,7 @@ static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb)
struct sock *sk = nskb->sk;
ktime_t now;
- if (!sk)
+ if (!sk || !sk_fullsock(sk))
return false;
if (!sock_flag(sk, SOCK_TXTIME))
@@ -131,8 +131,9 @@ static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
struct sock_exterr_skb *serr;
struct sk_buff *clone;
ktime_t txtime = skb->tstamp;
+ struct sock *sk = skb->sk;
- if (!skb->sk || !(skb->sk->sk_txtime_report_errors))
+ if (!sk || !sk_fullsock(sk) || !(sk->sk_txtime_report_errors))
return;
clone = skb_clone(skb, GFP_ATOMIC);
@@ -148,7 +149,7 @@ static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */
serr->ee.ee_info = txtime; /* low part of tstamp */
- if (sock_queue_err_skb(skb->sk, clone))
+ if (sock_queue_err_skb(sk, clone))
kfree_skb(clone);
}
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 98dd87ce1510..f757ea90aba6 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -301,6 +301,9 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
f->socket_hash != sk->sk_hash)) {
f->credit = q->initial_quantum;
f->socket_hash = sk->sk_hash;
+ if (q->rate_enable)
+ smp_store_release(&sk->sk_pacing_status,
+ SK_PACING_FQ);
if (fq_flow_is_throttled(f))
fq_flow_unset_throttled(q, f);
f->time_next_packet = 0ULL;
@@ -322,8 +325,12 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
fq_flow_set_detached(f);
f->sk = sk;
- if (skb->sk == sk)
+ if (skb->sk == sk) {
f->socket_hash = sk->sk_hash;
+ if (q->rate_enable)
+ smp_store_release(&sk->sk_pacing_status,
+ SK_PACING_FQ);
+ }
f->credit = q->initial_quantum;
rb_link_node(&f->fq_node, parent, p);
@@ -428,17 +435,9 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
f->qlen++;
qdisc_qstats_backlog_inc(sch, skb);
if (fq_flow_is_detached(f)) {
- struct sock *sk = skb->sk;
-
fq_flow_add_tail(&q->new_flows, f);
if (time_after(jiffies, f->age + q->flow_refill_delay))
f->credit = max_t(u32, f->credit, q->quantum);
- if (sk && q->rate_enable) {
- if (unlikely(smp_load_acquire(&sk->sk_pacing_status) !=
- SK_PACING_FQ))
- smp_store_release(&sk->sk_pacing_status,
- SK_PACING_FQ);
- }
q->inactive_flows--;
}
@@ -746,6 +745,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
[TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 },
[TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 },
[TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 },
+ [TCA_FQ_ORPHAN_MASK] = { .type = NLA_U32 },
[TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 },
[TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 },
};
@@ -788,10 +788,12 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_FQ_QUANTUM]) {
u32 quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
- if (quantum > 0)
+ if (quantum > 0 && quantum <= (1 << 20)) {
q->quantum = quantum;
- else
+ } else {
+ NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
err = -EINVAL;
+ }
}
if (tb[TCA_FQ_INITIAL_QUANTUM])
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index d59fbcc745d1..23dabff624cb 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -425,7 +425,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM]));
if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])
- q->drop_batch_size = min(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
+ q->drop_batch_size = max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
if (tb[TCA_FQ_CODEL_MEMORY_LIMIT])
q->memory_limit = min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT]));
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 17bd8f539bc7..30b5c7cff008 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -469,6 +469,7 @@ void __netdev_watchdog_up(struct net_device *dev)
dev_hold(dev);
}
}
+EXPORT_SYMBOL_GPL(__netdev_watchdog_up);
static void dev_watchdog_up(struct net_device *dev)
{
@@ -557,7 +558,11 @@ struct Qdisc noop_qdisc = {
.ops = &noop_qdisc_ops,
.q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
.dev_queue = &noop_netdev_queue,
+#ifdef CONFIG_PREEMPT_RT_BASE
+ .running = __SEQLOCK_UNLOCKED(noop_qdisc.running),
+#else
.running = SEQCNT_ZERO(noop_qdisc.running),
+#endif
.busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
.gso_skb = {
.next = (struct sk_buff *)&noop_qdisc.gso_skb,
@@ -657,7 +662,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
if (likely(skb)) {
qdisc_update_stats_at_dequeue(qdisc, skb);
} else {
- qdisc->empty = true;
+ WRITE_ONCE(qdisc->empty, true);
}
return skb;
@@ -862,9 +867,17 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
lockdep_set_class(&sch->busylock,
dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
+#ifdef CONFIG_PREEMPT_RT_BASE
+ seqlock_init(&sch->running);
+ lockdep_set_class(&sch->running.seqcount,
+ dev->qdisc_running_key ?: &qdisc_running_key);
+ lockdep_set_class(&sch->running.lock,
+ dev->qdisc_running_key ?: &qdisc_running_key);
+#else
seqcount_init(&sch->running);
lockdep_set_class(&sch->running,
dev->qdisc_running_key ?: &qdisc_running_key);
+#endif
sch->ops = ops;
sch->flags = ops->static_flags;
@@ -1218,7 +1231,7 @@ void dev_deactivate_many(struct list_head *head)
/* Wait for outstanding qdisc_run calls. */
list_for_each_entry(dev, head, close_list) {
while (some_qdisc_is_busy(dev))
- yield();
+ msleep(1);
/* The new qdisc is assigned at this point so we can safely
* unwind stale skb lists and qdisc statistics
*/
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 23cd1c873a2c..be35f03b657b 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -5,11 +5,11 @@
* Copyright (C) 2013 Nandita Dukkipati <nanditad@google.com>
*/
-#include <linux/jhash.h>
#include <linux/jiffies.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/vmalloc.h>
+#include <linux/siphash.h>
#include <net/pkt_sched.h>
#include <net/sock.h>
@@ -126,7 +126,7 @@ struct wdrr_bucket {
struct hhf_sched_data {
struct wdrr_bucket buckets[WDRR_BUCKET_CNT];
- u32 perturbation; /* hash perturbation */
+ siphash_key_t perturbation; /* hash perturbation */
u32 quantum; /* psched_mtu(qdisc_dev(sch)); */
u32 drop_overlimit; /* number of times max qdisc packet
* limit was hit
@@ -264,7 +264,7 @@ static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch)
}
/* Get hashed flow-id of the skb. */
- hash = skb_get_hash_perturb(skb, q->perturbation);
+ hash = skb_get_hash_perturb(skb, &q->perturbation);
/* Check if this packet belongs to an already established HH flow. */
flow_pos = hash & HHF_BIT_MASK;
@@ -582,7 +582,7 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt,
sch->limit = 1000;
q->quantum = psched_mtu(qdisc_dev(sch));
- q->perturbation = prandom_u32();
+ get_random_bytes(&q->perturbation, sizeof(q->perturbation));
INIT_LIST_HEAD(&q->new_buckets);
INIT_LIST_HEAD(&q->old_buckets);
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 0d578333e967..e79f1afe0cfd 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -153,6 +153,7 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
__gnet_stats_copy_queue(&sch->qstats,
qdisc->cpu_qstats,
&qdisc->qstats, qlen);
+ sch->q.qlen += qlen;
} else {
sch->q.qlen += qdisc->q.qlen;
sch->bstats.bytes += qdisc->bstats.bytes;
@@ -245,7 +246,8 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
sch = dev_queue->qdisc_sleeping;
- if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
+ if (gnet_stats_copy_basic(&sch->running, d, sch->cpu_bstats,
+ &sch->bstats) < 0 ||
qdisc_qstats_copy(d, sch) < 0)
return -1;
return 0;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 46980b8d66c5..8766ab5b8788 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -411,6 +411,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
__gnet_stats_copy_queue(&sch->qstats,
qdisc->cpu_qstats,
&qdisc->qstats, qlen);
+ sch->q.qlen += qlen;
} else {
sch->q.qlen += qdisc->q.qlen;
sch->bstats.bytes += qdisc->bstats.bytes;
@@ -433,7 +434,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.offset[tc] = dev->tc_to_txq[tc].offset;
}
- if (nla_put(skb, TCA_OPTIONS, NLA_ALIGN(sizeof(opt)), &opt))
+ if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
goto nla_put_failure;
if ((priv->flags & TC_MQPRIO_F_MODE) &&
@@ -557,8 +558,8 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
sch = dev_queue->qdisc_sleeping;
- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
- d, NULL, &sch->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d,
+ sch->cpu_bstats, &sch->bstats) < 0 ||
qdisc_qstats_copy(d, sch) < 0)
return -1;
}
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index e1087746f6a2..5cdf3b6abae6 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -330,7 +330,7 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
cl_q = q->queues[cl - 1];
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
- d, NULL, &cl_q->bstats) < 0 ||
+ d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
qdisc_qstats_copy(d, cl_q) < 0)
return -1;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index f5cb35e550f8..42e557d48e4e 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -476,7 +476,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
* skb will be queued.
*/
if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
- struct Qdisc *rootq = qdisc_root(sch);
+ struct Qdisc *rootq = qdisc_root_bh(sch);
u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
q->duplicate = 0;
@@ -509,6 +509,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (skb->ip_summed == CHECKSUM_PARTIAL &&
skb_checksum_help(skb)) {
qdisc_drop(skb, sch, to_free);
+ skb = NULL;
goto finish_segs;
}
@@ -593,9 +594,10 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
finish_segs:
if (segs) {
unsigned int len, last_len;
- int nb = 0;
+ int nb;
- len = skb->len;
+ len = skb ? skb->len : 0;
+ nb = skb ? 1 : 0;
while (segs) {
skb2 = segs->next;
@@ -612,7 +614,10 @@ finish_segs:
}
segs = skb2;
}
- qdisc_tree_reduce_backlog(sch, -nb, prev_len - len);
+ /* Parent qdiscs accounted for 1 skb of size @prev_len */
+ qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len));
+ } else if (!skb) {
+ return NET_XMIT_DROP;
}
return NET_XMIT_SUCCESS;
}
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 0f8fedb8809a..647941702f9f 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -292,8 +292,14 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
struct tc_prio_qopt_offload graft_offload;
unsigned long band = arg - 1;
- if (new == NULL)
- new = &noop_qdisc;
+ if (!new) {
+ new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+ TC_H_MAKE(sch->handle, arg), extack);
+ if (!new)
+ new = &noop_qdisc;
+ else
+ qdisc_hash_add(new, true);
+ }
*old = qdisc_replace(sch, new, &q->queues[band]);
@@ -356,7 +362,7 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
cl_q = q->queues[cl - 1];
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
- d, NULL, &cl_q->bstats) < 0 ||
+ d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
qdisc_qstats_copy(d, cl_q) < 0)
return -1;
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 1dff8506a715..4074c50ac3d7 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -18,7 +18,7 @@
#include <linux/errno.h>
#include <linux/skbuff.h>
#include <linux/random.h>
-#include <linux/jhash.h>
+#include <linux/siphash.h>
#include <net/ip.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
@@ -45,7 +45,7 @@ struct sfb_bucket {
* (Section 4.4 of SFB reference : moving hash functions)
*/
struct sfb_bins {
- u32 perturbation; /* jhash perturbation */
+ siphash_key_t perturbation; /* siphash key */
struct sfb_bucket bins[SFB_LEVELS][SFB_NUMBUCKETS];
};
@@ -217,7 +217,8 @@ static u32 sfb_compute_qlen(u32 *prob_r, u32 *avgpm_r, const struct sfb_sched_da
static void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q)
{
- q->bins[slot].perturbation = prandom_u32();
+ get_random_bytes(&q->bins[slot].perturbation,
+ sizeof(q->bins[slot].perturbation));
}
static void sfb_swap_slot(struct sfb_sched_data *q)
@@ -314,9 +315,9 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
/* If using external classifiers, get result and record it. */
if (!sfb_classify(skb, fl, &ret, &salt))
goto other_drop;
- sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
+ sfbhash = siphash_1u32(salt, &q->bins[slot].perturbation);
} else {
- sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation);
+ sfbhash = skb_get_hash_perturb(skb, &q->bins[slot].perturbation);
}
@@ -352,7 +353,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
/* Inelastic flow */
if (q->double_buffering) {
sfbhash = skb_get_hash_perturb(skb,
- q->bins[slot].perturbation);
+ &q->bins[slot].perturbation);
if (!sfbhash)
sfbhash = 1;
sfb_skb_cb(skb)->hashes[slot] = sfbhash;
@@ -488,7 +489,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
struct sfb_sched_data *q = qdisc_priv(sch);
- struct Qdisc *child;
+ struct Qdisc *child, *old;
struct nlattr *tb[TCA_SFB_MAX + 1];
const struct tc_sfb_qopt *ctl = &sfb_default_ops;
u32 limit;
@@ -518,8 +519,8 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt,
qdisc_hash_add(child, true);
sch_tree_lock(sch);
- qdisc_tree_flush_backlog(q->qdisc);
- qdisc_put(q->qdisc);
+ qdisc_purge_queue(q->qdisc);
+ old = q->qdisc;
q->qdisc = child;
q->rehash_interval = msecs_to_jiffies(ctl->rehash_interval);
@@ -542,6 +543,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt,
sfb_init_perturbation(1, q);
sch_tree_unlock(sch);
+ qdisc_put(old);
return 0;
}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 68404a9d2ce4..5a6def5e4e6d 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -14,7 +14,7 @@
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/skbuff.h>
-#include <linux/jhash.h>
+#include <linux/siphash.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <net/netlink.h>
@@ -117,7 +117,7 @@ struct sfq_sched_data {
u8 headdrop;
u8 maxdepth; /* limit of packets per flow */
- u32 perturbation;
+ siphash_key_t perturbation;
u8 cur_depth; /* depth of longest slot */
u8 flags;
unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
@@ -157,7 +157,7 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
static unsigned int sfq_hash(const struct sfq_sched_data *q,
const struct sk_buff *skb)
{
- return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1);
+ return skb_get_hash_perturb(skb, &q->perturbation) & (q->divisor - 1);
}
static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
@@ -607,9 +607,11 @@ static void sfq_perturbation(struct timer_list *t)
struct sfq_sched_data *q = from_timer(q, t, perturb_timer);
struct Qdisc *sch = q->sch;
spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+ siphash_key_t nkey;
+ get_random_bytes(&nkey, sizeof(nkey));
spin_lock(root_lock);
- q->perturbation = prandom_u32();
+ q->perturbation = nkey;
if (!q->filter_list && q->tail)
sfq_rehash(sch);
spin_unlock(root_lock);
@@ -635,6 +637,15 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
if (ctl->divisor &&
(!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
return -EINVAL;
+
+ /* slot->allot is a short, make sure quantum is not too big. */
+ if (ctl->quantum) {
+ unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum);
+
+ if (scaled <= 0 || scaled > SHRT_MAX)
+ return -EINVAL;
+ }
+
if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
ctl_v1->Wlog))
return -EINVAL;
@@ -688,7 +699,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
del_timer(&q->perturb_timer);
if (q->perturb_period) {
mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
- q->perturbation = prandom_u32();
+ get_random_bytes(&q->perturbation, sizeof(q->perturbation));
}
sch_tree_unlock(sch);
kfree(p);
@@ -745,7 +756,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt,
q->quantum = psched_mtu(qdisc_dev(sch));
q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
q->perturb_period = 0;
- q->perturbation = prandom_u32();
+ get_random_bytes(&q->perturbation, sizeof(q->perturbation));
if (opt) {
int err = sfq_change(sch, opt);
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
index 0fb10abf7579..7a5e4c454715 100644
--- a/net/sched/sch_skbprio.c
+++ b/net/sched/sch_skbprio.c
@@ -169,6 +169,9 @@ static int skbprio_change(struct Qdisc *sch, struct nlattr *opt,
{
struct tc_skbprio_qopt *ctl = nla_data(opt);
+ if (opt->nla_len != nla_attr_size(sizeof(*ctl)))
+ return -EINVAL;
+
sch->limit = ctl->limit;
return 0;
}
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 2f2967dcf15a..bc18daa3ba51 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -234,8 +234,10 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
prio = skb->priority;
tc = netdev_get_prio_tc_map(dev, prio);
- if (!(gate_mask & BIT(tc)))
+ if (!(gate_mask & BIT(tc))) {
+ skb = NULL;
continue;
+ }
len = qdisc_pkt_len(skb);
guard = ktime_add_ns(q->get_time(),
@@ -245,13 +247,17 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
* guard band ...
*/
if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
- ktime_after(guard, entry->close_time))
+ ktime_after(guard, entry->close_time)) {
+ skb = NULL;
continue;
+ }
/* ... and no budget. */
if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
- atomic_sub_return(len, &entry->budget) < 0)
+ atomic_sub_return(len, &entry->budget) < 0) {
+ skb = NULL;
continue;
+ }
skb = child->ops->dequeue(child);
if (unlikely(!skb))
@@ -552,7 +558,7 @@ static int taprio_parse_mqprio_opt(struct net_device *dev,
}
/* Verify priority mapping uses valid tcs */
- for (i = 0; i < TC_BITMASK + 1; i++) {
+ for (i = 0; i <= TC_BITMASK; i++) {
if (qopt->prio_tc_map[i] >= qopt->num_tc) {
NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping");
return -EINVAL;
@@ -709,6 +715,26 @@ static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
return NOTIFY_DONE;
}
+static int taprio_mqprio_cmp(const struct net_device *dev,
+ const struct tc_mqprio_qopt *mqprio)
+{
+ int i;
+
+ if (!mqprio || mqprio->num_tc != dev->num_tc)
+ return -1;
+
+ for (i = 0; i < mqprio->num_tc; i++)
+ if (dev->tc_to_txq[i].count != mqprio->count[i] ||
+ dev->tc_to_txq[i].offset != mqprio->offset[i])
+ return -1;
+
+ for (i = 0; i <= TC_BITMASK; i++)
+ if (dev->prio_tc_map[i] != mqprio->prio_tc_map[i])
+ return -1;
+
+ return 0;
+}
+
static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
@@ -745,6 +771,10 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
admin = rcu_dereference(q->admin_sched);
rcu_read_unlock();
+ /* no changes - no new mqprio settings */
+ if (!taprio_mqprio_cmp(dev, mqprio))
+ mqprio = NULL;
+
if (mqprio && (oper || admin)) {
NL_SET_ERR_MSG(extack, "Changing the traffic mapping of a running schedule is not supported");
err = -ENOTSUPP;
@@ -801,7 +831,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
mqprio->offset[i]);
/* Always use supplied priority mappings */
- for (i = 0; i < TC_BITMASK + 1; i++)
+ for (i = 0; i <= TC_BITMASK; i++)
netdev_set_prio_tc_map(dev, i,
mqprio->prio_tc_map[i]);
}
@@ -877,7 +907,7 @@ static void taprio_destroy(struct Qdisc *sch)
}
q->qdiscs = NULL;
- netdev_set_num_tc(dev, 0);
+ netdev_reset_tc(dev);
if (q->oper_sched)
call_rcu(&q->oper_sched->rcu, taprio_free_sched_cb);
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 689ef6f3ded8..2f1f0a378408 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -239,7 +239,7 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
char haddr[MAX_ADDR_LEN];
neigh_ha_snapshot(haddr, n, dev);
- err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)),
+ err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)),
haddr, NULL, skb->len);
if (err < 0)