diff options
Diffstat (limited to 'net/netfilter')
78 files changed, 2475 insertions, 1204 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index ef72819d9d31..d569915da003 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -118,7 +118,6 @@ config NF_CONNTRACK_ZONES config NF_CONNTRACK_PROCFS bool "Supply CT list in procfs (OBSOLETE)" - default y depends on PROC_FS ---help--- This option enables for the list of known conntrack entries diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 451b2df998ea..7c38b8fdc7e0 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -290,12 +290,6 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum, if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= hooknum)) return NULL; return net->nf.hooks_ipv6 + hooknum; -#if IS_ENABLED(CONFIG_DECNET) - case NFPROTO_DECNET: - if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= hooknum)) - return NULL; - return net->nf.hooks_decnet + hooknum; -#endif default: WARN_ON_ONCE(1); return NULL; @@ -577,9 +571,11 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct) rcu_read_lock(); ct_hook = rcu_dereference(nf_ct_hook); - BUG_ON(ct_hook == NULL); - ct_hook->destroy(nfct); + if (ct_hook) + ct_hook->destroy(nfct); rcu_read_unlock(); + + WARN_ON(!ct_hook); } EXPORT_SYMBOL(nf_conntrack_destroy); @@ -625,10 +621,6 @@ static int __net_init netfilter_net_init(struct net *net) #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE __netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge)); #endif -#if IS_ENABLED(CONFIG_DECNET) - __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet)); -#endif - #ifdef CONFIG_PROC_FS net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter", net->proc_net); diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index bfd4b42ba305..288675ce22d0 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -28,6 +28,7 @@ #define mtype_del IPSET_TOKEN(MTYPE, _del) #define mtype_list IPSET_TOKEN(MTYPE, _list) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) +#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) #define mtype MTYPE #define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id))) @@ -57,9 +58,6 @@ mtype_destroy(struct ip_set *set) { struct mtype *map = set->data; - if (SET_WITH_TIMEOUT(set)) - del_timer_sync(&map->gc); - if (set->dsize && set->extensions & IPSET_EXT_DESTROY) mtype_ext_cleanup(set); ip_set_free(map->members); @@ -288,6 +286,15 @@ mtype_gc(struct timer_list *t) add_timer(&map->gc); } +static void +mtype_cancel_gc(struct ip_set *set) +{ + struct mtype *map = set->data; + + if (SET_WITH_TIMEOUT(set)) + del_timer_sync(&map->gc); +} + static const struct ip_set_type_variant mtype = { .kadt = mtype_kadt, .uadt = mtype_uadt, @@ -301,6 +308,7 @@ static const struct ip_set_type_variant mtype = { .head = mtype_head, .list = mtype_list, .same_set = mtype_same_set, + .cancel_gc = mtype_cancel_gc, }; #endif /* __IP_SET_BITMAP_IP_GEN_H */ diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 6e3cf4d19ce8..194d775ce4cc 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -296,8 +296,8 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[], return -IPSET_ERR_BITMAP_RANGE; pr_debug("mask_bits %u, netmask %u\n", mask_bits, netmask); - hosts = 2 << (32 - netmask - 1); - elements = 2 << (netmask - mask_bits - 1); + hosts = 2U << (32 - netmask - 1); + elements = 2UL << (netmask - mask_bits - 1); } if (elements > IPSET_BITMAP_MAX_RANGE + 1) return -IPSET_ERR_BITMAP_RANGE_SIZE; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 16ae770f049d..544106475d4f 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -61,6 +61,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); ip_set_dereference((inst)->ip_set_list)[id] #define ip_set_ref_netlink(inst,id) \ rcu_dereference_raw((inst)->ip_set_list)[id] +#define ip_set_dereference_nfnl(p) \ + rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) /* The set types are implemented in modules and registered set types * can be found in ip_set_type_list. Adding/deleting types is @@ -530,6 +532,14 @@ __ip_set_put(struct ip_set *set) /* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need * a separate reference counter */ +static void +__ip_set_get_netlink(struct ip_set *set) +{ + write_lock_bh(&ip_set_ref_lock); + set->ref_netlink++; + write_unlock_bh(&ip_set_ref_lock); +} + static inline void __ip_set_put_netlink(struct ip_set *set) { @@ -548,15 +558,10 @@ __ip_set_put_netlink(struct ip_set *set) static inline struct ip_set * ip_set_rcu_get(struct net *net, ip_set_id_t index) { - struct ip_set *set; struct ip_set_net *inst = ip_set_pernet(net); - rcu_read_lock(); - /* ip_set_list itself needs to be protected */ - set = rcu_dereference(inst->ip_set_list)[index]; - rcu_read_unlock(); - - return set; + /* ip_set_list and the set pointer need to be protected */ + return ip_set_dereference_nfnl(inst->ip_set_list)[index]; } static inline void @@ -811,20 +816,9 @@ static struct nlmsghdr * start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags, enum ipset_cmd cmd) { - struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; - - nlh = nlmsg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd), - sizeof(*nfmsg), flags); - if (!nlh) - return NULL; - - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = NFPROTO_IPV4; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - - return nlh; + return nfnl_msg_put(skb, portid, seq, + nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd), flags, + NFPROTO_IPV4, NFNETLINK_V0, 0); } /* Create a set */ @@ -1012,6 +1006,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl, return ret; cleanup: + set->variant->cancel_gc(set); set->variant->destroy(set); put_out: module_put(set->type->me); @@ -1040,6 +1035,14 @@ ip_set_destroy_set(struct ip_set *set) kfree(set); } +static void +ip_set_destroy_set_rcu(struct rcu_head *head) +{ + struct ip_set *set = container_of(head, struct ip_set, rcu); + + ip_set_destroy_set(set); +} + static int ip_set_destroy(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[], @@ -1053,8 +1056,6 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl, if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; - /* Must wait for flush to be really finished in list:set */ - rcu_barrier(); /* Commands are serialized and references are * protected by the ip_set_ref_lock. @@ -1066,8 +1067,10 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl, * counter, so if it's already zero, we can proceed * without holding the lock. */ - read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { + /* Must wait for flush to be really finished in list:set */ + rcu_barrier(); + read_lock_bh(&ip_set_ref_lock); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); if (s && (s->ref || s->ref_netlink)) { @@ -1081,12 +1084,17 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl, s = ip_set(inst, i); if (s) { ip_set(inst, i) = NULL; + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); ip_set_destroy_set(s); } } /* Modified by ip_set_destroy() only, which is serialized */ inst->is_destroyed = false; } else { + u16 features = 0; + + read_lock_bh(&ip_set_ref_lock); s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &i); if (!s) { @@ -1096,10 +1104,16 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl, ret = -IPSET_ERR_BUSY; goto out; } + features = s->type->features; ip_set(inst, i) = NULL; read_unlock_bh(&ip_set_ref_lock); - - ip_set_destroy_set(s); + if (features & IPSET_TYPE_NAME) { + /* Must wait for flush to be really finished */ + rcu_barrier(); + } + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); + call_rcu(&s->rcu, ip_set_destroy_set_rcu); } return 0; out: @@ -1539,6 +1553,14 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, bool eexist = flags & IPSET_FLAG_EXIST, retried = false; do { + if (retried) { + __ip_set_get_netlink(set); + nfnl_unlock(NFNL_SUBSYS_IPSET); + cond_resched(); + nfnl_lock(NFNL_SUBSYS_IPSET); + __ip_set_put_netlink(set); + } + ip_set_lock(set); ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); ip_set_unlock(set); @@ -2215,6 +2237,7 @@ ip_set_net_exit(struct net *net) set = ip_set(inst, i); if (set) { ip_set(inst, i) = NULL; + set->variant->cancel_gc(set); ip_set_destroy_set(set); } } @@ -2262,8 +2285,11 @@ ip_set_fini(void) { nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - unregister_pernet_subsys(&ip_set_net_ops); + + /* Wait for call_rcu() in destroy */ + rcu_barrier(); + pr_debug("these are the famous last words\n"); } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 1b44dfa7ba85..4346cae25a4a 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -235,6 +235,7 @@ htable_size(u8 hbits) #undef mtype_gc_do #undef mtype_gc #undef mtype_gc_init +#undef mtype_cancel_gc #undef mtype_variant #undef mtype_data_match @@ -279,6 +280,7 @@ htable_size(u8 hbits) #define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) #define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) +#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) #define mtype_variant IPSET_TOKEN(MTYPE, _variant) #define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) @@ -444,7 +446,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy) u32 i; for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = __ipset_dereference(hbucket(t, i)); + n = (__force struct hbucket *)hbucket(t, i); if (!n) continue; if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) @@ -464,10 +466,7 @@ mtype_destroy(struct ip_set *set) struct htype *h = set->data; struct list_head *l, *lt; - if (SET_WITH_TIMEOUT(set)) - cancel_delayed_work_sync(&h->gc.dwork); - - mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true); + mtype_ahash_destroy(set, (__force struct htable *)h->table, true); list_for_each_safe(l, lt, &h->ad) { list_del(l); kfree(l); @@ -613,6 +612,15 @@ mtype_gc_init(struct htable_gc *gc) queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ); } +static void +mtype_cancel_gc(struct ip_set *set) +{ + struct htype *h = set->data; + + if (SET_WITH_TIMEOUT(set)) + cancel_delayed_work_sync(&h->gc.dwork); +} + static int mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags); @@ -1433,6 +1441,7 @@ static const struct ip_set_type_variant mtype_variant = { .uref = mtype_uref, .resize = mtype_resize, .same_set = mtype_same_set, + .cancel_gc = mtype_cancel_gc, .region_lock = true, }; diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index a82b70e8b9a6..aed319815358 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -35,6 +35,7 @@ MODULE_ALIAS("ip_set_hash:net,port,net"); #define IP_SET_HASH_WITH_PROTO #define IP_SET_HASH_WITH_NETS #define IPSET_NET_COUNT 2 +#define IP_SET_HASH_WITH_NET0 /* IPv4 variant */ diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 63908123f7ba..64cc3e2131f3 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -426,9 +426,6 @@ list_set_destroy(struct ip_set *set) struct list_set *map = set->data; struct set_elem *e, *n; - if (SET_WITH_TIMEOUT(set)) - del_timer_sync(&map->gc); - list_for_each_entry_safe(e, n, &map->members, list) { list_del(&e->list); ip_set_put_byindex(map->net, e->id); @@ -545,6 +542,15 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b) a->extensions == b->extensions; } +static void +list_set_cancel_gc(struct ip_set *set) +{ + struct list_set *map = set->data; + + if (SET_WITH_TIMEOUT(set)) + del_timer_sync(&map->gc); +} + static const struct ip_set_type_variant set_variant = { .kadt = list_set_kadt, .uadt = list_set_uadt, @@ -558,6 +564,7 @@ static const struct ip_set_type_variant set_variant = { .head = list_set_head, .list = list_set_list, .same_set = list_set_same_set, + .cancel_gc = list_set_cancel_gc, }; static void diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index f9b16f2b2219..fdacbc3c15be 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -599,13 +599,19 @@ static const struct seq_operations ip_vs_app_seq_ops = { int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs) { INIT_LIST_HEAD(&ipvs->app_list); - proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_seq_ops, - sizeof(struct seq_net_private)); +#ifdef CONFIG_PROC_FS + if (!proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, + &ip_vs_app_seq_ops, + sizeof(struct seq_net_private))) + return -ENOMEM; +#endif return 0; } void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs) { unregister_ip_vs_app(ipvs, NULL /* all */); +#ifdef CONFIG_PROC_FS remove_proc_entry("ip_vs_app", ipvs->net->proc_net); +#endif } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index a189079a6ea5..d66548d2e5de 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1225,8 +1225,8 @@ static inline int todrop_entry(struct ip_vs_conn *cp) * The drop rate array needs tuning for real environments. * Called from timer bh only => no locking */ - static const char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; - static char todrop_counter[9] = {0}; + static const signed char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; + static signed char todrop_counter[9] = {0}; int i; /* if the conn entry hasn't lasted for 60 seconds, don't drop it. @@ -1373,20 +1373,36 @@ int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs) { atomic_set(&ipvs->conn_count, 0); - proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net, - &ip_vs_conn_seq_ops, sizeof(struct ip_vs_iter_state)); - proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net, - &ip_vs_conn_sync_seq_ops, - sizeof(struct ip_vs_iter_state)); +#ifdef CONFIG_PROC_FS + if (!proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net, + &ip_vs_conn_seq_ops, + sizeof(struct ip_vs_iter_state))) + goto err_conn; + + if (!proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net, + &ip_vs_conn_sync_seq_ops, + sizeof(struct ip_vs_iter_state))) + goto err_conn_sync; +#endif + return 0; + +#ifdef CONFIG_PROC_FS +err_conn_sync: + remove_proc_entry("ip_vs_conn", ipvs->net->proc_net); +err_conn: + return -ENOMEM; +#endif } void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs) { /* flush all the connection entries first */ ip_vs_conn_flush(ipvs); +#ifdef CONFIG_PROC_FS remove_proc_entry("ip_vs_conn", ipvs->net->proc_net); remove_proc_entry("ip_vs_conn_sync", ipvs->net->proc_net); +#endif } int __init ip_vs_conn_init(void) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 07242503d74d..2bc82dabfe3b 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1759,6 +1759,7 @@ static int proc_do_sync_threshold(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct netns_ipvs *ipvs = table->extra2; int *valp = table->data; int val[2]; int rc; @@ -1768,6 +1769,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write, .mode = table->mode, }; + mutex_lock(&ipvs->sync_mutex); memcpy(val, valp, sizeof(val)); rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); if (write) { @@ -1777,6 +1779,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write, else memcpy(valp, val, sizeof(val)); } + mutex_unlock(&ipvs->sync_mutex); return rc; } @@ -4034,6 +4037,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD; ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; tbl[idx].data = &ipvs->sysctl_sync_threshold; + tbl[idx].extra2 = ipvs; tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD; tbl[idx++].data = &ipvs->sysctl_sync_refresh_period; diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index a0921adc31a9..1e689c714127 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -126,7 +126,8 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, if (sctph->source != cp->vport || payload_csum || skb->ip_summed == CHECKSUM_PARTIAL) { sctph->source = cp->vport; - sctp_nat_csum(skb, sctph, sctphoff); + if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb)) + sctp_nat_csum(skb, sctph, sctphoff); } else { skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -174,7 +175,8 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, (skb->ip_summed == CHECKSUM_PARTIAL && !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) { sctph->dest = cp->dport; - sctp_nat_csum(skb, sctph, sctphoff); + if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb)) + sctp_nat_csum(skb, sctph, sctphoff); } else if (skb->ip_summed != CHECKSUM_PARTIAL) { skb->ip_summed = CHECKSUM_UNNECESSARY; } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 0c1bc654245c..fb1dc205e3b5 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1444,7 +1444,7 @@ static int bind_mcastif_addr(struct socket *sock, struct net_device *dev) sin.sin_addr.s_addr = addr; sin.sin_port = 0; - return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin)); + return kernel_bind(sock, (struct sockaddr *)&sin, sizeof(sin)); } static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen, @@ -1510,8 +1510,8 @@ static int make_send_sock(struct netns_ipvs *ipvs, int id, } get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id); - result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr, - salen, 0); + result = kernel_connect(sock, (struct sockaddr *)&mcast_addr, + salen, 0); if (result < 0) { pr_err("Error connecting to the multicast addr\n"); goto error; @@ -1551,7 +1551,7 @@ static int make_receive_sock(struct netns_ipvs *ipvs, int id, get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id); sock->sk->sk_bound_dev_if = dev->ifindex; - result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen); + result = kernel_bind(sock, (struct sockaddr *)&mcast_addr, salen); if (result < 0) { pr_err("Error binding to the multicast addr\n"); goto error; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index cefc39878b1a..5c81772158d8 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -271,7 +271,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs, skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); return false; } @@ -286,7 +286,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs, { if (ip_hdr(skb)->ttl <= 1) { /* Tell the sender its packet died... */ - __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); + IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); return false; } @@ -1231,6 +1231,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->transport_header = skb->network_header; skb_set_inner_ipproto(skb, next_protocol); + skb_set_inner_mac_header(skb, skb_inner_network_offset(skb)); if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { bool check = false; @@ -1379,6 +1380,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->transport_header = skb->network_header; skb_set_inner_ipproto(skb, next_protocol); + skb_set_inner_mac_header(skb, skb_inner_network_offset(skb)); if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { bool check = false; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index d9b6f2001d00..fb1f12bb0ff2 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1943,6 +1943,9 @@ static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct, return 0; helper = rcu_dereference(help->helper); + if (!helper) + return 0; + if (!(helper->flags & NF_CT_HELPER_F_USERSPACE)) return 0; diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 9eca90414bb7..b22801f97bce 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -382,7 +382,7 @@ static int help(struct sk_buff *skb, int ret; u32 seq; int dir = CTINFO2DIR(ctinfo); - unsigned int uninitialized_var(matchlen), uninitialized_var(matchoff); + unsigned int matchlen, matchoff; struct nf_ct_ftp_master *ct_ftp_info = nfct_help_data(ct); struct nf_conntrack_expect *exp; union nf_inet_addr *daddr; diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index 573cb4481481..814857ae3b81 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -533,6 +533,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, /* Get fields bitmap */ if (nf_h323_error_boundary(bs, 0, f->sz)) return H323_ERROR_BOUND; + if (f->sz > 32) + return H323_ERROR_RANGE; bmp = get_bitmap(bs, f->sz); if (base) *(unsigned int *)base = bmp; @@ -589,6 +591,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, bmp2_len = get_bits(bs, 7) + 1; if (nf_h323_error_boundary(bs, 0, bmp2_len)) return H323_ERROR_BOUND; + if (bmp2_len > 32) + return H323_ERROR_RANGE; bmp2 = get_bitmap(bs, bmp2_len); bmp |= bmp2 >> f->sz; if (base) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 118f415928ae..32cc91f5ba99 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -404,6 +404,9 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES); BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1); + if (!nf_ct_helper_hash) + return -ENOENT; + if (me->expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT) return -EINVAL; @@ -587,4 +590,5 @@ void nf_conntrack_helper_fini(void) { nf_ct_extend_unregister(&helper_extend); kvfree(nf_ct_helper_hash); + nf_ct_helper_hash = NULL; } diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index e40988a2f22f..65b5b05fe38d 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -148,15 +148,37 @@ static int help(struct sk_buff *skb, unsigned int protoff, data = ib_ptr; data_limit = ib_ptr + skb->len - dataoff; - /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24 - * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */ - while (data < data_limit - (19 + MINMATCHLEN)) { - if (memcmp(data, "\1DCC ", 5)) { + /* Skip any whitespace */ + while (data < data_limit - 10) { + if (*data == ' ' || *data == '\r' || *data == '\n') + data++; + else + break; + } + + /* strlen("PRIVMSG x ")=10 */ + if (data < data_limit - 10) { + if (strncasecmp("PRIVMSG ", data, 8)) + goto out; + data += 8; + } + + /* strlen(" :\1DCC SENT t AAAAAAAA P\1\n")=26 + * 7+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=26 + */ + while (data < data_limit - (21 + MINMATCHLEN)) { + /* Find first " :", the start of message */ + if (memcmp(data, " :", 2)) { data++; continue; } + data += 2; + + /* then check that place only for the DCC command */ + if (memcmp(data, "\1DCC ", 5)) + goto out; data += 5; - /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */ + /* we have at least (21+MINMATCHLEN)-(2+5) bytes valid data left */ iph = ip_hdr(skb); pr_debug("DCC found in master %pI4:%u %pI4:%u\n", @@ -172,7 +194,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, pr_debug("DCC %s detected\n", dccprotos[i]); /* we have at least - * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid + * (21+MINMATCHLEN)-7-dccprotos[i].matchlen bytes valid * data left (== 14/13 bytes) */ if (parse_dcc(data, data_limit, &dcc_ip, &dcc_port, &addr_beg_p, &addr_end_p)) { @@ -185,8 +207,9 @@ static int help(struct sk_buff *skb, unsigned int protoff, /* dcc_ip can be the internal OR external (NAT'ed) IP */ tuple = &ct->tuplehash[dir].tuple; - if (tuple->src.u3.ip != dcc_ip && - tuple->dst.u3.ip != dcc_ip) { + if ((tuple->src.u3.ip != dcc_ip && + ct->tuplehash[!dir].tuple.dst.u3.ip != dcc_ip) || + dcc_port == 0) { net_warn_ratelimited("Forged DCC command from %pI4: %pI4:%u\n", &tuple->src.u3.ip, &dcc_ip, dcc_port); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index bc6f0c8874f8..45d02185f4b9 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -515,20 +515,15 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, { const struct nf_conntrack_zone *zone; struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; struct nlattr *nest_parms; unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_NEW); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, nf_ct_l3num(ct), + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = nf_ct_l3num(ct); - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - zone = nf_ct_zone(ct); nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG); @@ -685,7 +680,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) const struct nf_conntrack_zone *zone; struct net *net; struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; struct nlattr *nest_parms; struct nf_conn *ct = item->ct; struct sk_buff *skb; @@ -715,15 +709,11 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto errout; type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, type); - nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, item->portid, 0, type, flags, nf_ct_l3num(ct), + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = nf_ct_l3num(ct); - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - zone = nf_ct_zone(ct); nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG); @@ -1229,9 +1219,6 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data) { - if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) - return 0; - return ctnetlink_filter_match(ct, data); } @@ -1294,11 +1281,6 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, ct = nf_ct_tuplehash_to_ctrack(h); - if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) { - nf_ct_put(ct); - return -EBUSY; - } - if (cda[CTA_ID]) { __be32 id = nla_get_be32(cda[CTA_ID]); @@ -2086,12 +2068,15 @@ ctnetlink_create_conntrack(struct net *net, err = nf_conntrack_hash_check_insert(ct); if (err < 0) - goto err2; + goto err3; rcu_read_unlock(); return ct; +err3: + if (ct->master) + nf_ct_put(ct->master); err2: rcu_read_unlock(); err1: @@ -2205,20 +2190,15 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq, __u16 cpu, const struct ip_conntrack_stat *st) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET_STATS_CPU); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, htons(cpu)); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(cpu); - if (nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) || nla_put_be32(skb, CTA_STATS_INVALID, htonl(st->invalid)) || nla_put_be32(skb, CTA_STATS_IGNORE, htonl(st->ignore)) || @@ -2289,20 +2269,15 @@ ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct net *net) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0, event; unsigned int nr_conntracks = atomic_read(&net->ct.count); event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET_STATS); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (nla_put_be32(skb, CTA_STATS_GLOBAL_ENTRIES, htonl(nr_conntracks))) goto nla_put_failure; @@ -2709,7 +2684,9 @@ nla_put_failure: return -1; } +#if IS_ENABLED(CONFIG_NF_NAT) static const union nf_inet_addr any_addr; +#endif static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp) { @@ -2806,19 +2783,14 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int event, const struct nf_conntrack_expect *exp) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, + exp->tuple.src.l3num, NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = exp->tuple.src.l3num; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (ctnetlink_exp_dump_expect(skb, exp) < 0) goto nla_put_failure; @@ -2838,7 +2810,6 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) struct nf_conntrack_expect *exp = item->exp; struct net *net = nf_ct_exp_net(exp); struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; struct sk_buff *skb; unsigned int type, group; int flags = 0; @@ -2861,15 +2832,11 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) goto errout; type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, type); - nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, item->portid, 0, type, flags, + exp->tuple.src.l3num, NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = exp->tuple.src.l3num; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (ctnetlink_exp_dump_expect(skb, exp) < 0) goto nla_put_failure; @@ -3209,10 +3176,12 @@ ctnetlink_change_expect(struct nf_conntrack_expect *x, return 0; } +#if IS_ENABLED(CONFIG_NF_NAT) static const struct nla_policy exp_nat_nla_policy[CTA_EXPECT_NAT_MAX+1] = { [CTA_EXPECT_NAT_DIR] = { .type = NLA_U32 }, [CTA_EXPECT_NAT_TUPLE] = { .type = NLA_NESTED }, }; +#endif static int ctnetlink_parse_expect_nat(const struct nlattr *attr, @@ -3437,20 +3406,15 @@ ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int cpu, const struct ip_conntrack_stat *st) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_EXP_GET_STATS_CPU); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, htons(cpu)); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(cpu); - if (nla_put_be32(skb, CTA_STATS_EXP_NEW, htonl(st->expect_new)) || nla_put_be32(skb, CTA_STATS_EXP_CREATE, htonl(st->expect_create)) || nla_put_be32(skb, CTA_STATS_EXP_DELETE, htonl(st->expect_delete))) diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index b3f4a334f9d7..67b8dedef293 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -430,9 +430,19 @@ static bool dccp_error(const struct dccp_hdr *dh, struct sk_buff *skb, unsigned int dataoff, const struct nf_hook_state *state) { + static const unsigned long require_seq48 = 1 << DCCP_PKT_REQUEST | + 1 << DCCP_PKT_RESPONSE | + 1 << DCCP_PKT_CLOSEREQ | + 1 << DCCP_PKT_CLOSE | + 1 << DCCP_PKT_RESET | + 1 << DCCP_PKT_SYNC | + 1 << DCCP_PKT_SYNCACK; unsigned int dccp_len = skb->len - dataoff; unsigned int cscov; const char *msg; + u8 type; + + BUILD_BUG_ON(DCCP_PKT_INVALID >= BITS_PER_LONG); if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) || dh->dccph_doff * 4 > dccp_len) { @@ -457,10 +467,17 @@ static bool dccp_error(const struct dccp_hdr *dh, goto out_invalid; } - if (dh->dccph_type >= DCCP_PKT_INVALID) { + type = dh->dccph_type; + if (type >= DCCP_PKT_INVALID) { msg = "nf_ct_dccp: reserved packet type "; goto out_invalid; } + + if (test_bit(type, &require_seq48) && !dh->dccph_x) { + msg = "nf_ct_dccp: type lacks 48bit sequence numbers"; + goto out_invalid; + } + return false; out_invalid: nf_l4proto_log_invalid(skb, state->net, state->pf, @@ -468,24 +485,53 @@ out_invalid: return true; } +struct nf_conntrack_dccp_buf { + struct dccp_hdr dh; /* generic header part */ + struct dccp_hdr_ext ext; /* optional depending dh->dccph_x */ + union { /* depends on header type */ + struct dccp_hdr_ack_bits ack; + struct dccp_hdr_request req; + struct dccp_hdr_response response; + struct dccp_hdr_reset rst; + } u; +}; + +static struct dccp_hdr * +dccp_header_pointer(const struct sk_buff *skb, int offset, const struct dccp_hdr *dh, + struct nf_conntrack_dccp_buf *buf) +{ + unsigned int hdrlen = __dccp_hdr_len(dh); + + if (hdrlen > sizeof(*buf)) + return NULL; + + return skb_header_pointer(skb, offset, hdrlen, buf); +} + int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - struct dccp_hdr _dh, *dh; + struct nf_conntrack_dccp_buf _dh; u_int8_t type, old_state, new_state; enum ct_dccp_roles role; unsigned int *timeouts; + struct dccp_hdr *dh; - dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); + dh = skb_header_pointer(skb, dataoff, sizeof(*dh), &_dh.dh); if (!dh) return NF_DROP; if (dccp_error(dh, skb, dataoff, state)) return -NF_ACCEPT; + /* pull again, including possible 48 bit sequences and subtype header */ + dh = dccp_header_pointer(skb, dataoff, dh, &_dh); + if (!dh) + return NF_DROP; + type = dh->dccph_type; if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh)) return -NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c index 6f9144e1f1c1..ee45dbf1b035 100644 --- a/net/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/netfilter/nf_conntrack_proto_icmpv6.c @@ -128,6 +128,56 @@ static void icmpv6_error_log(const struct sk_buff *skb, IPPROTO_ICMPV6, "%s", msg); } +static noinline_for_stack int +nf_conntrack_icmpv6_redirect(struct nf_conn *tmpl, struct sk_buff *skb, + unsigned int dataoff, + const struct nf_hook_state *state) +{ + u8 hl = ipv6_hdr(skb)->hop_limit; + union nf_inet_addr outer_daddr; + union { + struct nd_opt_hdr nd_opt; + struct rd_msg rd_msg; + } tmp; + const struct nd_opt_hdr *nd_opt; + const struct rd_msg *rd_msg; + + rd_msg = skb_header_pointer(skb, dataoff, sizeof(*rd_msg), &tmp.rd_msg); + if (!rd_msg) { + icmpv6_error_log(skb, state, "short redirect"); + return -NF_ACCEPT; + } + + if (rd_msg->icmph.icmp6_code != 0) + return NF_ACCEPT; + + if (hl != 255 || !(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { + icmpv6_error_log(skb, state, "invalid saddr or hoplimit for redirect"); + return -NF_ACCEPT; + } + + dataoff += sizeof(*rd_msg); + + /* warning: rd_msg no longer usable after this call */ + nd_opt = skb_header_pointer(skb, dataoff, sizeof(*nd_opt), &tmp.nd_opt); + if (!nd_opt || nd_opt->nd_opt_len == 0) { + icmpv6_error_log(skb, state, "redirect without options"); + return -NF_ACCEPT; + } + + /* We could call ndisc_parse_options(), but it would need + * skb_linearize() and a bit more work. + */ + if (nd_opt->nd_opt_type != ND_OPT_REDIRECT_HDR) + return NF_ACCEPT; + + memcpy(&outer_daddr.ip6, &ipv6_hdr(skb)->daddr, + sizeof(outer_daddr.ip6)); + dataoff += 8; + return nf_conntrack_inet_error(tmpl, skb, dataoff, state, + IPPROTO_ICMPV6, &outer_daddr); +} + int nf_conntrack_icmpv6_error(struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, @@ -158,6 +208,9 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl, return NF_ACCEPT; } + if (icmp6h->icmp6_type == NDISC_REDIRECT) + return nf_conntrack_icmpv6_redirect(tmpl, skb, dataoff, state); + /* is not error message ? */ if (icmp6h->icmp6_type >= 128) return NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 7626f3e1c70a..6b2a215b2786 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -27,22 +27,16 @@ #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_timeout.h> -/* FIXME: Examine ipfilter's timeouts and conntrack transitions more - closely. They're more complex. --RR - - And so for me for SCTP :D -Kiran */ - static const char *const sctp_conntrack_names[] = { - "NONE", - "CLOSED", - "COOKIE_WAIT", - "COOKIE_ECHOED", - "ESTABLISHED", - "SHUTDOWN_SENT", - "SHUTDOWN_RECD", - "SHUTDOWN_ACK_SENT", - "HEARTBEAT_SENT", - "HEARTBEAT_ACKED", + [SCTP_CONNTRACK_NONE] = "NONE", + [SCTP_CONNTRACK_CLOSED] = "CLOSED", + [SCTP_CONNTRACK_COOKIE_WAIT] = "COOKIE_WAIT", + [SCTP_CONNTRACK_COOKIE_ECHOED] = "COOKIE_ECHOED", + [SCTP_CONNTRACK_ESTABLISHED] = "ESTABLISHED", + [SCTP_CONNTRACK_SHUTDOWN_SENT] = "SHUTDOWN_SENT", + [SCTP_CONNTRACK_SHUTDOWN_RECD] = "SHUTDOWN_RECD", + [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = "SHUTDOWN_ACK_SENT", + [SCTP_CONNTRACK_HEARTBEAT_SENT] = "HEARTBEAT_SENT", }; #define SECS * HZ @@ -54,12 +48,11 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = { [SCTP_CONNTRACK_CLOSED] = 10 SECS, [SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS, [SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS, - [SCTP_CONNTRACK_ESTABLISHED] = 5 DAYS, - [SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000, - [SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000, + [SCTP_CONNTRACK_ESTABLISHED] = 210 SECS, + [SCTP_CONNTRACK_SHUTDOWN_SENT] = 3 SECS, + [SCTP_CONNTRACK_SHUTDOWN_RECD] = 3 SECS, [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS, [SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS, - [SCTP_CONNTRACK_HEARTBEAT_ACKED] = 210 SECS, }; #define SCTP_FLAG_HEARTBEAT_VTAG_FAILED 1 @@ -73,7 +66,6 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = { #define sSR SCTP_CONNTRACK_SHUTDOWN_RECD #define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT #define sHS SCTP_CONNTRACK_HEARTBEAT_SENT -#define sHA SCTP_CONNTRACK_HEARTBEAT_ACKED #define sIV SCTP_CONNTRACK_MAX /* @@ -96,9 +88,6 @@ SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of the SHUTDOWN chunk. Connection is closed. HEARTBEAT_SENT - We have seen a HEARTBEAT in a new flow. -HEARTBEAT_ACKED - We have seen a HEARTBEAT-ACK in the direction opposite to - that of the HEARTBEAT chunk. Secondary connection is - established. */ /* TODO @@ -115,33 +104,33 @@ cookie echoed to closed. static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { { /* ORIGINAL */ -/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */ -/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA}, -/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA}, -/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, -/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS}, -/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA, sHA}, -/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't have Stale cookie*/ -/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* 5.2.4 - Big TODO */ -/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't come in orig dir */ -/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL, sHA}, -/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}, -/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA} +/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */ +/* init */ {sCL, sCL, sCW, sCE, sES, sCL, sCL, sSA, sCW}, +/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL}, +/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, +/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL}, +/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA}, +/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't have Stale cookie*/ +/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL},/* 5.2.4 - Big TODO */ +/* cookie_ack */ {sCL, sCL, sCW, sES, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */ +/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL}, +/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, +/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, }, { /* REPLY */ -/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */ -/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* INIT in sCL Big TODO */ -/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA}, -/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV, sCL}, -/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV, sSR}, -/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV, sHA}, -/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV, sHA}, -/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* Can't come in reply dir */ -/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV, sHA}, -/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV, sHA}, -/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}, -/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA} +/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */ +/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* INIT in sCL Big TODO */ +/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV}, +/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV}, +/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV}, +/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV}, +/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV}, +/* cookie_echo */ {sIV, sCL, sCE, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */ +/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV}, +/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV}, +/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, +/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sES}, } }; @@ -310,7 +299,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb, pr_debug("Setting vtag %x for secondary conntrack\n", sh->vtag); ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag; - } else { + } else if (sch->type == SCTP_CID_SHUTDOWN_ACK) { /* If it is a shutdown ack OOTB packet, we expect a return shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ pr_debug("Setting vtag %x for new conn OOTB\n", @@ -412,24 +401,34 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { /* Special cases of Verification tag check (Sec 8.5.1) */ if (sch->type == SCTP_CID_INIT) { - /* Sec 8.5.1 (A) */ + /* (A) vtag MUST be zero */ if (sh->vtag != 0) goto out_unlock; } else if (sch->type == SCTP_CID_ABORT) { - /* Sec 8.5.1 (B) */ - if (sh->vtag != ct->proto.sctp.vtag[dir] && - sh->vtag != ct->proto.sctp.vtag[!dir]) + /* (B) vtag MUST match own vtag if T flag is unset OR + * MUST match peer's vtag if T flag is set + */ + if ((!(sch->flags & SCTP_CHUNK_FLAG_T) && + sh->vtag != ct->proto.sctp.vtag[dir]) || + ((sch->flags & SCTP_CHUNK_FLAG_T) && + sh->vtag != ct->proto.sctp.vtag[!dir])) goto out_unlock; } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { - /* Sec 8.5.1 (C) */ - if (sh->vtag != ct->proto.sctp.vtag[dir] && - sh->vtag != ct->proto.sctp.vtag[!dir] && - sch->flags & SCTP_CHUNK_FLAG_T) + /* (C) vtag MUST match own vtag if T flag is unset OR + * MUST match peer's vtag if T flag is set + */ + if ((!(sch->flags & SCTP_CHUNK_FLAG_T) && + sh->vtag != ct->proto.sctp.vtag[dir]) || + ((sch->flags & SCTP_CHUNK_FLAG_T) && + sh->vtag != ct->proto.sctp.vtag[!dir])) goto out_unlock; } else if (sch->type == SCTP_CID_COOKIE_ECHO) { - /* Sec 8.5.1 (D) */ + /* (D) vtag must be same as init_vtag as found in INIT_ACK */ if (sh->vtag != ct->proto.sctp.vtag[dir]) goto out_unlock; + } else if (sch->type == SCTP_CID_COOKIE_ACK) { + ct->proto.sctp.init[dir] = 0; + ct->proto.sctp.init[!dir] = 0; } else if (sch->type == SCTP_CID_HEARTBEAT) { if (ct->proto.sctp.vtag[dir] == 0) { pr_debug("Setting %d vtag %x for dir %d\n", sch->type, sh->vtag, dir); @@ -478,16 +477,18 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, } /* If it is an INIT or an INIT ACK note down the vtag */ - if (sch->type == SCTP_CID_INIT || - sch->type == SCTP_CID_INIT_ACK) { - struct sctp_inithdr _inithdr, *ih; + if (sch->type == SCTP_CID_INIT) { + struct sctp_inithdr _ih, *ih; - ih = skb_header_pointer(skb, offset + sizeof(_sch), - sizeof(_inithdr), &_inithdr); - if (ih == NULL) + ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih); + if (!ih) goto out_unlock; - pr_debug("Setting vtag %x for dir %d\n", - ih->init_tag, !dir); + + if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir]) + ct->proto.sctp.init[!dir] = 0; + ct->proto.sctp.init[dir] = 1; + + pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir); ct->proto.sctp.vtag[!dir] = ih->init_tag; /* don't renew timeout on init retransmit so @@ -498,11 +499,33 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, old_state == SCTP_CONNTRACK_CLOSED && nf_ct_is_confirmed(ct)) ignore = true; + } else if (sch->type == SCTP_CID_INIT_ACK) { + struct sctp_inithdr _ih, *ih; + __be32 vtag; + + ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih); + if (!ih) + goto out_unlock; + + vtag = ct->proto.sctp.vtag[!dir]; + if (!ct->proto.sctp.init[!dir] && vtag && vtag != ih->init_tag) + goto out_unlock; + /* collision */ + if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir] && + vtag != ih->init_tag) + goto out_unlock; + + pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir); + ct->proto.sctp.vtag[!dir] = ih->init_tag; } ct->proto.sctp.state = new_state; - if (old_state != new_state) + if (old_state != new_state) { nf_conntrack_event_cache(IPCT_PROTOINFO, ct); + if (new_state == SCTP_CONNTRACK_ESTABLISHED && + !test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) + nf_conntrack_event_cache(IPCT_ASSURED, ct); + } } spin_unlock_bh(&ct->lock); @@ -516,14 +539,6 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); - if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED && - dir == IP_CT_DIR_REPLY && - new_state == SCTP_CONNTRACK_ESTABLISHED) { - pr_debug("Setting assured bit\n"); - set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_ASSURED, ct); - } - return NF_ACCEPT; out_unlock: diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index b8cc3339a249..aed967e2f30f 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1158,6 +1158,16 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } + + if (index == TCP_SYN_SET && old_state == TCP_CONNTRACK_SYN_SENT) { + /* do not renew timeout on SYN retransmit. + * + * Else port reuse by client or NAT middlebox can keep + * entry alive indefinitely (including nat info). + */ + return NF_ACCEPT; + } + /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection * pickup with loose=1. Avoid large ESTABLISHED timeout. */ diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index b83dc9bf0a5d..751df19fe0f8 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -477,7 +477,7 @@ static int ct_sip_walk_headers(const struct nf_conn *ct, const char *dptr, return ret; if (ret == 0) break; - dataoff += *matchoff; + dataoff = *matchoff; } *in_header = 0; } @@ -489,7 +489,7 @@ static int ct_sip_walk_headers(const struct nf_conn *ct, const char *dptr, break; if (ret == 0) return ret; - dataoff += *matchoff; + dataoff = *matchoff; } if (in_header) @@ -611,7 +611,7 @@ int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr, start += strlen(name); *val = simple_strtoul(start, &end, 0); if (start == end) - return 0; + return -1; if (matchoff && matchlen) { *matchoff = start - dptr; *matchlen = end - start; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index a3faeacaa1cb..1e3dbed9d784 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -581,7 +581,6 @@ enum nf_ct_sysctl_index { NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_RECD, NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT, NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT, - NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED, #endif #ifdef CONFIG_NF_CT_PROTO_DCCP NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST, @@ -851,12 +850,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { - .procname = "nf_conntrack_sctp_timeout_heartbeat_acked", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, #endif #ifdef CONFIG_NF_CT_PROTO_DCCP [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST] = { @@ -985,7 +978,6 @@ static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net, XASSIGN(SHUTDOWN_RECD, sn); XASSIGN(SHUTDOWN_ACK_SENT, sn); XASSIGN(HEARTBEAT_SENT, sn); - XASSIGN(HEARTBEAT_ACKED, sn); #undef XASSIGN #endif } @@ -1188,11 +1180,12 @@ static int __init nf_conntrack_standalone_init(void) nf_conntrack_htable_size_user = nf_conntrack_htable_size; #endif + nf_conntrack_init_end(); + ret = register_pernet_subsys(&nf_conntrack_net_ops); if (ret < 0) goto out_pernet; - nf_conntrack_init_end(); return 0; out_pernet: diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index bb25d4c794c7..e25dbeb220b4 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -203,11 +203,12 @@ void nf_logger_put(int pf, enum nf_log_type type) return; } - BUG_ON(loggers[pf][type] == NULL); - rcu_read_lock(); logger = rcu_dereference(loggers[pf][type]); - module_put(logger->me); + if (!logger) + WARN_ON_ONCE(1); + else + module_put(logger->me); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nf_logger_put); diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c index f91579c821e9..5b37487d9d11 100644 --- a/net/netfilter/nf_nat_redirect.c +++ b/net/netfilter/nf_nat_redirect.c @@ -10,6 +10,7 @@ #include <linux/if.h> #include <linux/inetdevice.h> +#include <linux/in.h> #include <linux/ip.h> #include <linux/kernel.h> #include <linux/netdevice.h> @@ -24,81 +25,104 @@ #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_redirect.h> +static unsigned int +nf_nat_redirect(struct sk_buff *skb, const struct nf_nat_range2 *range, + const union nf_inet_addr *newdst) +{ + struct nf_nat_range2 newrange; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + + memset(&newrange, 0, sizeof(newrange)); + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr = *newdst; + newrange.max_addr = *newdst; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); +} + unsigned int -nf_nat_redirect_ipv4(struct sk_buff *skb, - const struct nf_nat_ipv4_multi_range_compat *mr, +nf_nat_redirect_ipv4(struct sk_buff *skb, const struct nf_nat_range2 *range, unsigned int hooknum) { - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - __be32 newdst; - struct nf_nat_range2 newrange; + union nf_inet_addr newdst = {}; WARN_ON(hooknum != NF_INET_PRE_ROUTING && hooknum != NF_INET_LOCAL_OUT); - ct = nf_ct_get(skb, &ctinfo); - WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED))); - /* Local packets: make them go to loopback */ if (hooknum == NF_INET_LOCAL_OUT) { - newdst = htonl(0x7F000001); + newdst.ip = htonl(INADDR_LOOPBACK); } else { const struct in_device *indev; - newdst = 0; - indev = __in_dev_get_rcu(skb->dev); if (indev) { const struct in_ifaddr *ifa; ifa = rcu_dereference(indev->ifa_list); if (ifa) - newdst = ifa->ifa_local; + newdst.ip = ifa->ifa_local; } - if (!newdst) + if (!newdst.ip) return NF_DROP; } - /* Transfer from original range. */ - memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); - memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); - newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.ip = newdst; - newrange.max_addr.ip = newdst; - newrange.min_proto = mr->range[0].min; - newrange.max_proto = mr->range[0].max; - - /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); + return nf_nat_redirect(skb, range, &newdst); } EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv4); static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; +static bool nf_nat_redirect_ipv6_usable(const struct inet6_ifaddr *ifa, unsigned int scope) +{ + unsigned int ifa_addr_type = ipv6_addr_type(&ifa->addr); + + if (ifa_addr_type & IPV6_ADDR_MAPPED) + return false; + + if ((ifa->flags & IFA_F_TENTATIVE) && (!(ifa->flags & IFA_F_OPTIMISTIC))) + return false; + + if (scope) { + unsigned int ifa_scope = ifa_addr_type & IPV6_ADDR_SCOPE_MASK; + + if (!(scope & ifa_scope)) + return false; + } + + return true; +} + unsigned int nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, unsigned int hooknum) { - struct nf_nat_range2 newrange; - struct in6_addr newdst; - enum ip_conntrack_info ctinfo; - struct nf_conn *ct; + union nf_inet_addr newdst = {}; - ct = nf_ct_get(skb, &ctinfo); if (hooknum == NF_INET_LOCAL_OUT) { - newdst = loopback_addr; + newdst.in6 = loopback_addr; } else { + unsigned int scope = ipv6_addr_scope(&ipv6_hdr(skb)->daddr); struct inet6_dev *idev; - struct inet6_ifaddr *ifa; bool addr = false; idev = __in6_dev_get(skb->dev); if (idev != NULL) { + const struct inet6_ifaddr *ifa; + read_lock_bh(&idev->lock); list_for_each_entry(ifa, &idev->addr_list, if_list) { - newdst = ifa->addr; + if (!nf_nat_redirect_ipv6_usable(ifa, scope)) + continue; + + newdst.in6 = ifa->addr; addr = true; break; } @@ -109,12 +133,6 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, return NF_DROP; } - newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.in6 = newdst; - newrange.max_addr.in6 = newdst; - newrange.min_proto = range->min_proto; - newrange.max_proto = range->max_proto; - - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); + return nf_nat_redirect(skb, range, &newdst); } EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv6); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 545da270e802..8131d858f38d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -20,15 +20,22 @@ #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_offload.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> #include <net/sock.h> #define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-")) +#define NFT_SET_MAX_ANONLEN 16 + +unsigned int nf_tables_net_id __read_mostly; +EXPORT_SYMBOL_GPL(nf_tables_net_id); static LIST_HEAD(nf_tables_expressions); static LIST_HEAD(nf_tables_objects); static LIST_HEAD(nf_tables_flowtables); static LIST_HEAD(nf_tables_destroy_list); +static LIST_HEAD(nf_tables_gc_list); static DEFINE_SPINLOCK(nf_tables_destroy_list_lock); +static DEFINE_SPINLOCK(nf_tables_gc_list_lock); static u64 table_handle; enum { @@ -67,7 +74,9 @@ static const struct rhashtable_params nft_objname_ht_params = { static void nft_validate_state_update(struct net *net, u8 new_validate_state) { - switch (net->nft.validate_state) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + switch (nft_net->validate_state) { case NFT_VALIDATE_SKIP: WARN_ON_ONCE(new_validate_state == NFT_VALIDATE_DO); break; @@ -78,11 +87,14 @@ static void nft_validate_state_update(struct net *net, u8 new_validate_state) return; } - net->nft.validate_state = new_validate_state; + nft_net->validate_state = new_validate_state; } static void nf_tables_trans_destroy_work(struct work_struct *w); static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work); +static void nft_trans_gc_work(struct work_struct *work); +static DECLARE_WORK(trans_gc_work, nft_trans_gc_work); + static void nft_ctx_init(struct nft_ctx *ctx, struct net *net, const struct sk_buff *skb, @@ -113,6 +125,8 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, if (trans == NULL) return NULL; + INIT_LIST_HEAD(&trans->list); + INIT_LIST_HEAD(&trans->binding_list); trans->msg_type = msg_type; trans->ctx = *ctx; @@ -125,34 +139,67 @@ static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx, return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL); } -static void nft_trans_destroy(struct nft_trans *trans) +static void nft_trans_list_del(struct nft_trans *trans) { list_del(&trans->list); + list_del(&trans->binding_list); +} + +static void nft_trans_destroy(struct nft_trans *trans) +{ + nft_trans_list_del(trans); kfree(trans); } -static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) +static void __nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set, + bool bind) { + struct nftables_pernet *nft_net; struct net *net = ctx->net; struct nft_trans *trans; if (!nft_set_is_anonymous(set)) return; - list_for_each_entry_reverse(trans, &net->nft.commit_list, list) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry_reverse(trans, &nft_net->commit_list, list) { switch (trans->msg_type) { case NFT_MSG_NEWSET: if (nft_trans_set(trans) == set) - nft_trans_set_bound(trans) = true; + nft_trans_set_bound(trans) = bind; break; case NFT_MSG_NEWSETELEM: if (nft_trans_elem_set(trans) == set) - nft_trans_elem_set_bound(trans) = true; + nft_trans_elem_set_bound(trans) = bind; break; } } } +static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) +{ + return __nft_set_trans_bind(ctx, set, true); +} + +static void nft_set_trans_unbind(const struct nft_ctx *ctx, struct nft_set *set) +{ + return __nft_set_trans_bind(ctx, set, false); +} + +static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans) +{ + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + switch (trans->msg_type) { + case NFT_MSG_NEWSET: + if (nft_set_is_anonymous(nft_trans_set(trans))) + list_add_tail(&trans->binding_list, &nft_net->binding_list); + break; + } + + list_add_tail(&trans->list, &nft_net->commit_list); +} + static int nf_tables_register_hook(struct net *net, const struct nft_table *table, struct nft_chain *chain) @@ -173,9 +220,10 @@ static int nf_tables_register_hook(struct net *net, return nf_register_net_hook(net, ops); } -static void nf_tables_unregister_hook(struct net *net, - const struct nft_table *table, - struct nft_chain *chain) +static void __nf_tables_unregister_hook(struct net *net, + const struct nft_table *table, + struct nft_chain *chain, + bool release_netdev) { const struct nft_base_chain *basechain; const struct nf_hook_ops *ops; @@ -190,6 +238,16 @@ static void nf_tables_unregister_hook(struct net *net, return basechain->type->ops_unregister(net, ops); nf_unregister_net_hook(net, ops); + if (release_netdev && + table->family == NFPROTO_NETDEV) + nft_base_chain(chain)->ops.dev = NULL; +} + +static void nf_tables_unregister_hook(struct net *net, + const struct nft_table *table, + struct nft_chain *chain) +{ + __nf_tables_unregister_hook(net, table, chain, false); } static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) @@ -203,7 +261,7 @@ static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) if (msg_type == NFT_MSG_NEWTABLE) nft_activate_next(ctx->net, ctx->table); - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } @@ -230,7 +288,7 @@ static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) if (msg_type == NFT_MSG_NEWCHAIN) nft_activate_next(ctx->net, ctx->chain); - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return trans; } @@ -242,7 +300,7 @@ static int nft_delchain(struct nft_ctx *ctx) if (IS_ERR(trans)) return PTR_ERR(trans); - ctx->table->use--; + nft_use_dec(&ctx->table->use); nft_deactivate_next(ctx->net, ctx->chain); return 0; @@ -283,7 +341,7 @@ nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule) /* You cannot delete the same rule twice */ if (nft_is_active_next(ctx->net, rule)) { nft_deactivate_next(ctx->net, rule); - ctx->chain->use--; + nft_use_dec(&ctx->chain->use); return 0; } return -ENOENT; @@ -303,7 +361,7 @@ static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type, ntohl(nla_get_be32(ctx->nla[NFTA_RULE_ID])); } nft_trans_rule(trans) = rule; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return trans; } @@ -358,11 +416,32 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, nft_activate_next(ctx->net, set); } nft_trans_set(trans) = set; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); + + return 0; +} + +static int nft_mapelem_deactivate(const struct nft_ctx *ctx, + struct nft_set *set, + const struct nft_set_iter *iter, + struct nft_set_elem *elem) +{ + nft_setelem_data_deactivate(ctx->net, set, elem); return 0; } +static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set) +{ + struct nft_set_iter iter = { + .genmask = nft_genmask_next(ctx->net), + .fn = nft_mapelem_deactivate, + }; + + set->ops->walk(ctx, set, &iter); + WARN_ON_ONCE(iter.err); +} + static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) { int err; @@ -371,8 +450,11 @@ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) if (err < 0) return err; + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + nft_deactivate_next(ctx->net, set); - ctx->table->use--; + nft_use_dec(&ctx->table->use); return err; } @@ -390,7 +472,7 @@ static int nft_trans_obj_add(struct nft_ctx *ctx, int msg_type, nft_activate_next(ctx->net, obj); nft_trans_obj(trans) = obj; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } @@ -404,7 +486,7 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj) return err; nft_deactivate_next(ctx->net, obj); - ctx->table->use--; + nft_use_dec(&ctx->table->use); return err; } @@ -423,7 +505,7 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, nft_activate_next(ctx->net, flowtable); nft_trans_flowtable(trans) = flowtable; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } @@ -438,7 +520,7 @@ static int nft_delflowtable(struct nft_ctx *ctx, return err; nft_deactivate_next(ctx->net, flowtable); - ctx->table->use--; + nft_use_dec(&ctx->table->use); return err; } @@ -451,13 +533,15 @@ static struct nft_table *nft_table_lookup(const struct net *net, const struct nlattr *nla, u8 family, u8 genmask) { + struct nftables_pernet *nft_net; struct nft_table *table; if (nla == NULL) return ERR_PTR(-EINVAL); - list_for_each_entry_rcu(table, &net->nft.tables, list, - lockdep_is_held(&net->nft.commit_mutex)) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry_rcu(table, &nft_net->tables, list, + lockdep_is_held(&nft_net->commit_mutex)) { if (!nla_strcmp(nla, table->name) && table->family == family && nft_active_genmask(table, genmask)) @@ -471,9 +555,11 @@ static struct nft_table *nft_table_lookup_byhandle(const struct net *net, const struct nlattr *nla, u8 genmask) { + struct nftables_pernet *nft_net; struct nft_table *table; - list_for_each_entry(table, &net->nft.tables, list) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry(table, &nft_net->tables, list) { if (be64_to_cpu(nla_get_be64(nla)) == table->handle && nft_active_genmask(table, genmask)) return table; @@ -525,6 +611,7 @@ struct nft_module_request { static int nft_request_module(struct net *net, const char *fmt, ...) { char module_name[MODULE_NAME_LEN]; + struct nftables_pernet *nft_net; struct nft_module_request *req; va_list args; int ret; @@ -535,7 +622,8 @@ static int nft_request_module(struct net *net, const char *fmt, ...) if (ret >= MODULE_NAME_LEN) return 0; - list_for_each_entry(req, &net->nft.module_list, list) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry(req, &nft_net->module_list, list) { if (!strcmp(req->module, module_name)) { if (req->done) return 0; @@ -551,7 +639,7 @@ static int nft_request_module(struct net *net, const char *fmt, ...) req->done = false; strlcpy(req->module, module_name, MODULE_NAME_LEN); - list_add_tail(&req->list, &net->nft.module_list); + list_add_tail(&req->list, &nft_net->module_list); return -EAGAIN; } @@ -587,6 +675,13 @@ nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla, return ERR_PTR(-ENOENT); } +static __be16 nft_base_seq(const struct net *net) +{ + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + return htons(nft_net->base_seq & 0xffff); +} + static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { [NFTA_TABLE_NAME] = { .type = NLA_STRING, .len = NFT_TABLE_MAXNAMELEN - 1 }, @@ -599,20 +694,16 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, int family, const struct nft_table *table) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, + NFNETLINK_V0, nft_base_seq(net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || - nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) || + nla_put_be32(skb, NFTA_TABLE_FLAGS, + htonl(table->flags & NFT_TABLE_F_MASK)) || nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) || nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle), NFTA_TABLE_PAD)) @@ -657,15 +748,17 @@ static int nf_tables_dump_tables(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + struct nftables_pernet *nft_net; const struct nft_table *table; unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -769,7 +862,7 @@ static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt) if (cnt && i++ == cnt) break; - nf_unregister_net_hook(net, &nft_base_chain(chain)->ops); + nf_tables_unregister_hook(net, table, chain); } } @@ -784,7 +877,7 @@ static int nf_tables_table_enable(struct net *net, struct nft_table *table) if (!nft_is_base_chain(chain)) continue; - err = nf_register_net_hook(net, &nft_base_chain(chain)->ops); + err = nf_tables_register_hook(net, table, chain); if (err < 0) goto err; @@ -799,14 +892,40 @@ err: static void nf_tables_table_disable(struct net *net, struct nft_table *table) { + table->flags &= ~NFT_TABLE_F_DORMANT; nft_table_disable(net, table, 0); + table->flags |= NFT_TABLE_F_DORMANT; +} + +#define __NFT_TABLE_F_INTERNAL (NFT_TABLE_F_MASK + 1) +#define __NFT_TABLE_F_WAS_DORMANT (__NFT_TABLE_F_INTERNAL << 0) +#define __NFT_TABLE_F_WAS_AWAKEN (__NFT_TABLE_F_INTERNAL << 1) +#define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \ + __NFT_TABLE_F_WAS_AWAKEN) + +static bool nft_table_pending_update(const struct nft_ctx *ctx) +{ + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); + struct nft_trans *trans; + + if (ctx->table->flags & __NFT_TABLE_F_UPDATE) + return true; + + list_for_each_entry(trans, &nft_net->commit_list, list) { + if (trans->ctx.table == ctx->table && + trans->msg_type == NFT_MSG_DELCHAIN && + nft_is_base_chain(trans->ctx.chain)) + return true; + } + + return false; } static int nf_tables_updtable(struct nft_ctx *ctx) { struct nft_trans *trans; u32 flags; - int ret = 0; + int ret; if (!ctx->nla[NFTA_TABLE_FLAGS]) return 0; @@ -815,9 +934,13 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if (flags & ~NFT_TABLE_F_DORMANT) return -EINVAL; - if (flags == ctx->table->flags) + if (flags == (ctx->table->flags & NFT_TABLE_F_MASK)) return 0; + /* No dormant off/on/off/on games in single transaction */ + if (nft_table_pending_update(ctx)) + return -EINVAL; + trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, sizeof(struct nft_trans_table)); if (trans == NULL) @@ -825,22 +948,28 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if ((flags & NFT_TABLE_F_DORMANT) && !(ctx->table->flags & NFT_TABLE_F_DORMANT)) { - nft_trans_table_enable(trans) = false; + ctx->table->flags |= NFT_TABLE_F_DORMANT; + if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE)) + ctx->table->flags |= __NFT_TABLE_F_WAS_AWAKEN; } else if (!(flags & NFT_TABLE_F_DORMANT) && ctx->table->flags & NFT_TABLE_F_DORMANT) { - ret = nf_tables_table_enable(ctx->net, ctx->table); - if (ret >= 0) { - ctx->table->flags &= ~NFT_TABLE_F_DORMANT; - nft_trans_table_enable(trans) = true; + ctx->table->flags &= ~NFT_TABLE_F_DORMANT; + if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE)) { + ret = nf_tables_table_enable(ctx->net, ctx->table); + if (ret < 0) + goto err_register_hooks; + + ctx->table->flags |= __NFT_TABLE_F_WAS_DORMANT; } } - if (ret < 0) - goto err; nft_trans_table_update(trans) = true; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); + return 0; -err: + +err_register_hooks: + ctx->table->flags |= NFT_TABLE_F_DORMANT; nft_trans_destroy(trans); return ret; } @@ -896,11 +1025,36 @@ static int nft_objname_hash_cmp(struct rhashtable_compare_arg *arg, return strcmp(obj->key.name, k->name); } +static bool nft_supported_family(u8 family) +{ + return false +#ifdef CONFIG_NF_TABLES_INET + || family == NFPROTO_INET +#endif +#ifdef CONFIG_NF_TABLES_IPV4 + || family == NFPROTO_IPV4 +#endif +#ifdef CONFIG_NF_TABLES_ARP + || family == NFPROTO_ARP +#endif +#ifdef CONFIG_NF_TABLES_NETDEV + || family == NFPROTO_NETDEV +#endif +#if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE) + || family == NFPROTO_BRIDGE +#endif +#ifdef CONFIG_NF_TABLES_IPV6 + || family == NFPROTO_IPV6 +#endif + ; +} + static int nf_tables_newtable(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[], struct netlink_ext_ack *extack) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; @@ -910,7 +1064,10 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, struct nft_ctx ctx; int err; - lockdep_assert_held(&net->nft.commit_mutex); + if (!nft_supported_family(family)) + return -EOPNOTSUPP; + + lockdep_assert_held(&nft_net->commit_mutex); attr = nla[NFTA_TABLE_NAME]; table = nft_table_lookup(net, attr, family, genmask); if (IS_ERR(table)) { @@ -960,7 +1117,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, if (err < 0) goto err_trans; - list_add_tail_rcu(&table->list, &net->nft.tables); + list_add_tail_rcu(&table->list, &nft_net->tables); return 0; err_trans: rhltable_destroy(&table->chains_ht); @@ -995,8 +1152,7 @@ static int nft_flush_table(struct nft_ctx *ctx) if (!nft_is_active_next(ctx->net, set)) continue; - if (nft_set_is_anonymous(set) && - !list_empty(&set->bindings)) + if (nft_set_is_anonymous(set)) continue; err = nft_delset(ctx, set); @@ -1040,11 +1196,12 @@ out: static int nft_flush(struct nft_ctx *ctx, int family) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); struct nft_table *table, *nt; const struct nlattr * const *nla = ctx->nla; int err = 0; - list_for_each_entry_safe(table, nt, &ctx->net->nft.tables, list) { + list_for_each_entry_safe(table, nt, &nft_net->tables, list) { if (family != AF_UNSPEC && table->family != family) continue; @@ -1158,7 +1315,9 @@ nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask) static bool lockdep_commit_lock_is_held(const struct net *net) { #ifdef CONFIG_PROVE_LOCKING - return lockdep_is_held(&net->nft.commit_mutex); + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + return lockdep_is_held(&nft_net->commit_mutex); #else return true; #endif @@ -1262,18 +1421,13 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, const struct nft_chain *chain) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, + NFNETLINK_V0, nft_base_seq(net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name)) goto nla_put_failure; if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle), @@ -1366,11 +1520,13 @@ static int nf_tables_dump_chains(struct sk_buff *skb, unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + struct nftables_pernet *nft_net; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -1556,12 +1712,13 @@ static int nft_chain_parse_hook(struct net *net, struct nft_chain_hook *hook, u8 family, bool autoload) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nlattr *ha[NFTA_HOOK_MAX + 1]; const struct nft_chain_type *type; struct net_device *dev; int err; - lockdep_assert_held(&net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); lockdep_nfnl_nft_mutex_not_held(); err = nla_parse_nested_deprecated(ha, NFTA_HOOK_MAX, @@ -1662,13 +1819,13 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, struct nft_rule **rules; int err; - if (table->use == UINT_MAX) - return -EOVERFLOW; - if (nla[NFTA_CHAIN_HOOK]) { struct nft_chain_hook hook; struct nf_hook_ops *ops; + if (table->flags & __NFT_TABLE_F_UPDATE) + return -EINVAL; + err = nft_chain_parse_hook(net, nla, &hook, family, true); if (err < 0) return err; @@ -1741,6 +1898,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (err < 0) goto err1; + if (!nft_use_inc(&table->use)) { + err = -EMFILE; + goto err_use; + } + err = rhltable_insert_key(&table->chains_ht, chain->name, &chain->rhlhead, nft_chain_ht_params); if (err) @@ -1758,11 +1920,12 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (nft_is_base_chain(chain)) nft_trans_chain_policy(trans) = policy; - table->use++; list_add_tail_rcu(&chain->list, &table->chains); return 0; err2: + nft_use_dec_restore(&table->use); +err_use: nf_tables_unregister_hook(net, table, chain); err1: nf_tables_chain_destroy(ctx); @@ -1846,6 +2009,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, if (nla[NFTA_CHAIN_HANDLE] && nla[NFTA_CHAIN_NAME]) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); struct nft_trans *tmp; char *name; @@ -1855,7 +2019,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, goto err; err = -EEXIST; - list_for_each_entry(tmp, &ctx->net->nft.commit_list, list) { + list_for_each_entry(tmp, &nft_net->commit_list, list) { if (tmp->msg_type == NFT_MSG_NEWCHAIN && tmp->ctx.table == table && nft_trans_chain_update(tmp) && @@ -1868,7 +2032,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, nft_trans_chain_name(trans) = name; } - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err: @@ -1882,6 +2046,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, const struct nlattr * const nla[], struct netlink_ext_ack *extack) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; @@ -1893,7 +2058,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, u64 handle = 0; u32 flags = 0; - lockdep_assert_held(&net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask); if (IS_ERR(table)) { @@ -2074,7 +2239,7 @@ static const struct nft_expr_type *__nft_expr_type_get(u8 family, { const struct nft_expr_type *type, *candidate = NULL; - list_for_each_entry(type, &nf_tables_expressions, list) { + list_for_each_entry_rcu(type, &nf_tables_expressions, list) { if (!nla_strcmp(nla, type->name)) { if (!type->family && !candidate) candidate = type; @@ -2106,9 +2271,13 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net, if (nla == NULL) return ERR_PTR(-EINVAL); + rcu_read_lock(); type = __nft_expr_type_get(family, nla); - if (type != NULL && try_module_get(type->owner)) + if (type != NULL && try_module_get(type->owner)) { + rcu_read_unlock(); return type; + } + rcu_read_unlock(); lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES @@ -2267,27 +2436,31 @@ struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, err = nf_tables_expr_parse(ctx, nla, &info); if (err < 0) - goto err1; + goto err_expr_parse; + + err = -EOPNOTSUPP; + if (!(info.ops->type->flags & NFT_EXPR_STATEFUL)) + goto err_expr_stateful; err = -ENOMEM; expr = kzalloc(info.ops->size, GFP_KERNEL); if (expr == NULL) - goto err2; + goto err_expr_stateful; err = nf_tables_newexpr(ctx, &info, expr); if (err < 0) - goto err3; + goto err_expr_new; return expr; -err3: +err_expr_new: kfree(expr); -err2: +err_expr_stateful: owner = info.ops->type->owner; if (info.ops->type->release_ops) info.ops->type->release_ops(info.ops); module_put(owner); -err1: +err_expr_parse: return ERR_PTR(err); } @@ -2348,20 +2521,15 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, const struct nft_rule *prule) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; const struct nft_expr *expr, *next; struct nlattr *list; u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, type, flags, family, NFNETLINK_V0, + nft_base_seq(net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_RULE_TABLE, table->name)) goto nla_put_failure; if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name)) @@ -2482,11 +2650,13 @@ static int nf_tables_dump_rules(struct sk_buff *skb, unsigned int idx = 0; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + struct nftables_pernet *nft_net; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -2703,12 +2873,15 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) err = nft_chain_validate(&ctx, chain); if (err < 0) return err; + + cond_resched(); } return 0; } static struct nft_rule *nft_rule_lookup_byid(const struct net *net, + const struct nft_chain *chain, const struct nlattr *nla); #define NFT_RULE_MAXEXPRS 128 @@ -2718,6 +2891,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, const struct nlattr * const nla[], struct netlink_ext_ack *extack) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); struct nft_expr_info *info = NULL; @@ -2735,7 +2909,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, int err, rem; u64 handle, pos_handle; - lockdep_assert_held(&net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask); if (IS_ERR(table)) { @@ -2771,9 +2945,6 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, return -EINVAL; handle = nf_tables_alloc_handle(table); - if (chain->use == UINT_MAX) - return -EOVERFLOW; - if (nla[NFTA_RULE_POSITION]) { pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION])); old_rule = __nft_rule_lookup(chain, pos_handle); @@ -2782,7 +2953,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, return PTR_ERR(old_rule); } } else if (nla[NFTA_RULE_POSITION_ID]) { - old_rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_POSITION_ID]); + old_rule = nft_rule_lookup_byid(net, chain, nla[NFTA_RULE_POSITION_ID]); if (IS_ERR(old_rule)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION_ID]); return PTR_ERR(old_rule); @@ -2855,16 +3026,21 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, expr = nft_expr_next(expr); } + if (!nft_use_inc(&chain->use)) { + err = -EMFILE; + goto err2; + } + if (nlh->nlmsg_flags & NLM_F_REPLACE) { trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule); if (trans == NULL) { err = -ENOMEM; - goto err2; + goto err_destroy_flow_rule; } err = nft_delrule(&ctx, old_rule); if (err < 0) { nft_trans_destroy(trans); - goto err2; + goto err_destroy_flow_rule; } list_add_tail_rcu(&rule->list, &old_rule->list); @@ -2872,7 +3048,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule); if (!trans) { err = -ENOMEM; - goto err2; + goto err_destroy_flow_rule; } if (nlh->nlmsg_flags & NLM_F_APPEND) { @@ -2888,9 +3064,8 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, } } kvfree(info); - chain->use++; - if (net->nft.validate_state == NFT_VALIDATE_DO) + if (nft_net->validate_state == NFT_VALIDATE_DO) return nft_table_validate(net, table); if (chain->flags & NFT_CHAIN_HW_OFFLOAD) { @@ -2902,8 +3077,12 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, } return 0; + +err_destroy_flow_rule: + nft_use_dec_restore(&chain->use); err2: - nf_tables_rule_release(&ctx, rule); + nft_rule_expr_deactivate(&ctx, rule, NFT_TRANS_PREPARE_ERROR); + nf_tables_rule_destroy(&ctx, rule); err1: for (i = 0; i < n; i++) { if (info[i].ops) { @@ -2917,15 +3096,18 @@ err1: } static struct nft_rule *nft_rule_lookup_byid(const struct net *net, + const struct nft_chain *chain, const struct nlattr *nla) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); u32 id = ntohl(nla_get_be32(nla)); struct nft_trans *trans; - list_for_each_entry(trans, &net->nft.commit_list, list) { + list_for_each_entry(trans, &nft_net->commit_list, list) { struct nft_rule *rule = nft_trans_rule(trans); if (trans->msg_type == NFT_MSG_NEWRULE && + trans->ctx.chain == chain && id == nft_trans_rule_id(trans)) return rule; } @@ -2972,7 +3154,7 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, err = nft_delrule(&ctx, rule); } else if (nla[NFTA_RULE_ID]) { - rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_ID]); + rule = nft_rule_lookup_byid(net, chain, nla[NFTA_RULE_ID]); if (IS_ERR(rule)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_ID]); return PTR_ERR(rule); @@ -3040,12 +3222,13 @@ nft_select_set_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc, enum nft_set_policies policy) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); const struct nft_set_ops *ops, *bops; struct nft_set_estimate est, best; const struct nft_set_type *type; u32 flags = 0; - lockdep_assert_held(&ctx->net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES if (list_empty(&nf_tables_set_types)) { @@ -3187,16 +3370,19 @@ static struct nft_set *nft_set_lookup_byhandle(const struct nft_table *table, } static struct nft_set *nft_set_lookup_byid(const struct net *net, + const struct nft_table *table, const struct nlattr *nla, u8 genmask) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans; u32 id = ntohl(nla_get_be32(nla)); - list_for_each_entry(trans, &net->nft.commit_list, list) { + list_for_each_entry(trans, &nft_net->commit_list, list) { if (trans->msg_type == NFT_MSG_NEWSET) { struct nft_set *set = nft_trans_set(trans); if (id == nft_trans_set_id(trans) && + set->table == table && nft_active_genmask(set, genmask)) return set; } @@ -3217,7 +3403,7 @@ struct nft_set *nft_set_lookup_global(const struct net *net, if (!nla_set_id) return set; - set = nft_set_lookup_byid(net, nla_set_id, genmask); + set = nft_set_lookup_byid(net, table, nla_set_id, genmask); } return set; } @@ -3236,6 +3422,9 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, if (p[1] != 'd' || strchr(p + 2, '%')) return -EINVAL; + if (strnlen(name, NFT_SET_MAX_ANONLEN) >= NFT_SET_MAX_ANONLEN) + return -EINVAL; + inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL); if (inuse == NULL) return -ENOMEM; @@ -3243,7 +3432,7 @@ cont: list_for_each_entry(i, &ctx->table->sets, list) { int tmp; - if (!nft_is_active_next(ctx->net, set)) + if (!nft_is_active_next(ctx->net, i)) continue; if (!sscanf(i->name, name, &tmp)) continue; @@ -3299,23 +3488,17 @@ __be64 nf_jiffies64_to_msecs(u64 input) static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, const struct nft_set *set, u16 event, u16 flags) { - struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; struct nlattr *desc; u32 portid = ctx->portid; u32 seq = ctx->seq; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), - flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family, + NFNETLINK_V0, nft_base_seq(ctx->net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = ctx->family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name)) goto nla_put_failure; if (nla_put_string(skb, NFTA_SET_NAME, set->name)) @@ -3411,14 +3594,16 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; struct net *net = sock_net(skb->sk); struct nft_ctx *ctx = cb->data, ctx_set; + struct nftables_pernet *nft_net; if (cb->args[1]) return skb->len; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (ctx->family != NFPROTO_UNSPEC && ctx->family != table->family) continue; @@ -3609,6 +3794,12 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) == (NFT_SET_EVAL | NFT_SET_OBJECT)) return -EOPNOTSUPP; + if ((flags & (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT | NFT_SET_EVAL)) == + (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT)) + return -EOPNOTSUPP; + if ((flags & (NFT_SET_CONSTANT | NFT_SET_TIMEOUT)) == + (NFT_SET_CONSTANT | NFT_SET_TIMEOUT)) + return -EOPNOTSUPP; } dtype = 0; @@ -3650,6 +3841,9 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &timeout); if (err) return err; @@ -3658,6 +3852,10 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (nla[NFTA_SET_GC_INTERVAL] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); } @@ -3711,10 +3909,15 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (ops->privsize != NULL) size = ops->privsize(nla, &desc); + if (!nft_use_inc(&table->use)) { + err = -EMFILE; + goto err1; + } + set = kvzalloc(sizeof(*set) + size + udlen, GFP_KERNEL); if (!set) { err = -ENOMEM; - goto err1; + goto err_alloc; } name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL); @@ -3735,6 +3938,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, } INIT_LIST_HEAD(&set->bindings); + refcount_set(&set->refs, 1); set->table = table; write_pnet(&set->net, net); set->ops = ops; @@ -3761,29 +3965,38 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, goto err4; list_add_tail_rcu(&set->list, &table->sets); - table->use++; + return 0; err4: - ops->destroy(set); + ops->destroy(&ctx, set); err3: kfree(set->name); err2: kvfree(set); +err_alloc: + nft_use_dec_restore(&table->use); err1: module_put(to_set_type(ops)->owner); return err; } -static void nft_set_destroy(struct nft_set *set) +static void nft_set_put(struct nft_set *set) +{ + if (refcount_dec_and_test(&set->refs)) { + kfree(set->name); + kvfree(set); + } +} + +static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) { if (WARN_ON(set->use > 0)) return; - set->ops->destroy(set); + set->ops->destroy(ctx, set); module_put(to_set_type(set->ops)->owner); - kfree(set->name); - kvfree(set); + nft_set_put(set); } static int nf_tables_delset(struct net *net, struct sock *nlsk, @@ -3829,6 +4042,12 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk, return nft_delset(&ctx, set); } +static int nft_validate_register_store(const struct nft_ctx *ctx, + enum nft_registers reg, + const struct nft_data *data, + enum nft_data_types type, + unsigned int len); + static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, @@ -3850,9 +4069,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *i; struct nft_set_iter iter; - if (set->use == UINT_MAX) - return -EOVERFLOW; - if (!list_empty(&set->bindings) && nft_set_is_anonymous(set)) return -EBUSY; @@ -3877,10 +4093,12 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, return iter.err; } bind: + if (!nft_use_inc(&set->use)) + return -EMFILE; + binding->chain = ctx->chain; list_add_tail_rcu(&binding->list, &set->bindings); nft_set_trans_bind(ctx, set); - set->use++; return 0; } @@ -3893,23 +4111,81 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) { list_del_rcu(&set->list); + set->dead = 1; if (event) nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_KERNEL); } } +static void nft_setelem_data_activate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem); + +static int nft_mapelem_activate(const struct nft_ctx *ctx, + struct nft_set *set, + const struct nft_set_iter *iter, + struct nft_set_elem *elem) +{ + nft_setelem_data_activate(ctx->net, set, elem); + + return 0; +} + +static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set) +{ + struct nft_set_iter iter = { + .genmask = nft_genmask_next(ctx->net), + .fn = nft_mapelem_activate, + }; + + set->ops->walk(ctx, set, &iter); + WARN_ON_ONCE(iter.err); +} + +void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set) +{ + if (nft_set_is_anonymous(set)) { + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_activate(ctx, set); + + nft_clear(ctx->net, set); + } + + nft_use_inc_restore(&set->use); +} +EXPORT_SYMBOL_GPL(nf_tables_activate_set); + void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, enum nft_trans_phase phase) { switch (phase) { + case NFT_TRANS_PREPARE_ERROR: + nft_set_trans_unbind(ctx, set); + if (nft_set_is_anonymous(set)) + nft_deactivate_next(ctx->net, set); + else + list_del_rcu(&binding->list); + + nft_use_dec(&set->use); + break; case NFT_TRANS_PREPARE: - set->use--; + if (nft_set_is_anonymous(set)) { + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + + nft_deactivate_next(ctx->net, set); + } + nft_use_dec(&set->use); return; case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: - set->use--; + if (nft_set_is_anonymous(set) && + set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + + nft_use_dec(&set->use); /* fall through */ default: nf_tables_unbind_set(ctx, set, binding, @@ -3921,7 +4197,7 @@ EXPORT_SYMBOL_GPL(nf_tables_deactivate_set); void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set) { if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) - nft_set_destroy(set); + nft_set_destroy(ctx, set); } EXPORT_SYMBOL_GPL(nf_tables_destroy_set); @@ -4091,8 +4367,12 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx, const struct nft_set_iter *iter, struct nft_set_elem *elem) { + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); struct nft_set_dump_args *args; + if (nft_set_elem_expired(ext) || nft_set_elem_is_dead(ext)) + return 0; + args = container_of(iter, struct nft_set_dump_args, iter); return nf_tables_fill_setelem(args->skb, set, elem); } @@ -4106,18 +4386,19 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) { struct nft_set_dump_ctx *dump_ctx = cb->data; struct net *net = sock_net(skb->sk); + struct nftables_pernet *nft_net; struct nft_table *table; struct nft_set *set; struct nft_set_dump_args args; bool set_found = false; - struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; struct nlattr *nest; u32 portid, seq; int event; rcu_read_lock(); - list_for_each_entry_rcu(table, &net->nft.tables, list) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (dump_ctx->ctx.family != NFPROTO_UNSPEC && dump_ctx->ctx.family != table->family) continue; @@ -4143,16 +4424,11 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) portid = NETLINK_CB(cb->skb).portid; seq = cb->nlh->nlmsg_seq; - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), - NLM_F_MULTI); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, NLM_F_MULTI, + table->family, NFNETLINK_V0, nft_base_seq(net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = table->family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, table->name)) goto nla_put_failure; if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name)) @@ -4209,22 +4485,16 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, const struct nft_set *set, const struct nft_set_elem *elem) { - struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; struct nlattr *nest; int err; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), - flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family, + NFNETLINK_V0, nft_base_seq(ctx->net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = ctx->family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name)) goto nla_put_failure; if (nla_put_string(skb, NFTA_SET_NAME, set->name)) @@ -4264,11 +4534,54 @@ static int nft_setelem_parse_flags(const struct nft_set *set, return 0; } +static int nft_setelem_parse_key(struct nft_ctx *ctx, struct nft_set *set, + struct nft_data *key, struct nlattr *attr) +{ + struct nft_data_desc desc; + int err; + + err = nft_data_init(ctx, key, NFT_DATA_VALUE_MAXLEN, &desc, attr); + if (err < 0) + return err; + + if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) { + nft_data_release(key, desc.type); + return -EINVAL; + } + + return 0; +} + +static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set, + struct nft_data_desc *desc, + struct nft_data *data, + struct nlattr *attr) +{ + u32 dtype; + int err; + + err = nft_data_init(ctx, data, NFT_DATA_VALUE_MAXLEN, desc, attr); + if (err < 0) + return err; + + if (set->dtype == NFT_DATA_VERDICT) + dtype = NFT_DATA_VERDICT; + else + dtype = NFT_DATA_VALUE; + + if (dtype != desc->type || + set->dlen != desc->len) { + nft_data_release(data, desc->type); + return -EINVAL; + } + + return 0; +} + static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; - struct nft_data_desc desc; struct nft_set_elem elem; struct sk_buff *skb; uint32_t flags = 0; @@ -4287,17 +4600,11 @@ static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) return err; - err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc, - nla[NFTA_SET_ELEM_KEY]); + err = nft_setelem_parse_key(ctx, set, &elem.key.val, + nla[NFTA_SET_ELEM_KEY]); if (err < 0) return err; - err = -EINVAL; - if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) { - nft_data_release(&elem.key.val, desc.type); - return err; - } - priv = set->ops->get(ctx->net, set, &elem, flags); if (IS_ERR(priv)) return PTR_ERR(priv); @@ -4444,6 +4751,7 @@ void *nft_set_elem_init(const struct nft_set *set, return elem; } +/* Drop references and destroy. Called from gc, dynset and abort path. */ void nft_set_elem_destroy(const struct nft_set *set, void *elem, bool destroy_expr) { @@ -4467,16 +4775,16 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, } } if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) - (*nft_set_ext_obj(ext))->use--; + nft_use_dec(&(*nft_set_ext_obj(ext))->use); kfree(elem); } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); -/* Only called from commit path, nft_set_elem_deactivate() already deals with - * the refcounting from the preparation phase. +/* Destroy element. References have been already dropped in the preparation + * path via nft_setelem_data_deactivate(). */ -static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, - const struct nft_set *set, void *elem) +void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, + const struct nft_set *set, void *elem) { struct nft_set_ext *ext = nft_set_elem_ext(set, elem); @@ -4484,20 +4792,20 @@ static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, nf_tables_expr_destroy(ctx, nft_set_ext_expr(ext)); kfree(elem); } +EXPORT_SYMBOL_GPL(nf_tables_set_elem_destroy); static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr, u32 nlmsg_flags) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; u8 genmask = nft_genmask_next(ctx->net); - struct nft_data_desc d1, d2; struct nft_set_ext_tmpl tmpl; struct nft_set_ext *ext, *ext2; struct nft_set_elem elem; struct nft_set_binding *binding; struct nft_object *obj = NULL; struct nft_userdata *udata; - struct nft_data data; + struct nft_data_desc desc; enum nft_registers dreg; struct nft_trans *trans; u32 flags = 0; @@ -4531,6 +4839,15 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, return -EINVAL; } + if (set->flags & NFT_SET_OBJECT) { + if (!nla[NFTA_SET_ELEM_OBJREF] && + !(flags & NFT_SET_ELEM_INTERVAL_END)) + return -EINVAL; + } else { + if (nla[NFTA_SET_ELEM_OBJREF]) + return -EINVAL; + } + if ((flags & NFT_SET_ELEM_INTERVAL_END) && (nla[NFTA_SET_ELEM_DATA] || nla[NFTA_SET_ELEM_OBJREF] || @@ -4562,15 +4879,12 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, return err; } - err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &d1, - nla[NFTA_SET_ELEM_KEY]); + err = nft_setelem_parse_key(ctx, set, &elem.key.val, + nla[NFTA_SET_ELEM_KEY]); if (err < 0) goto err1; - err = -EINVAL; - if (d1.type != NFT_DATA_VALUE || d1.len != set->klen) - goto err2; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, d1.len); + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); if (timeout > 0) { nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION); if (timeout != set->timeout) @@ -4578,30 +4892,30 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, } if (nla[NFTA_SET_ELEM_OBJREF] != NULL) { - if (!(set->flags & NFT_SET_OBJECT)) { - err = -EINVAL; - goto err2; - } obj = nft_obj_lookup(ctx->net, ctx->table, nla[NFTA_SET_ELEM_OBJREF], set->objtype, genmask); if (IS_ERR(obj)) { err = PTR_ERR(obj); + obj = NULL; + goto err2; + } + + if (!nft_use_inc(&obj->use)) { + err = -EMFILE; + obj = NULL; goto err2; } + nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF); } if (nla[NFTA_SET_ELEM_DATA] != NULL) { - err = nft_data_init(ctx, &data, sizeof(data), &d2, - nla[NFTA_SET_ELEM_DATA]); + err = nft_setelem_parse_data(ctx, set, &desc, &elem.data.val, + nla[NFTA_SET_ELEM_DATA]); if (err < 0) goto err2; - err = -EINVAL; - if (set->dtype != NFT_DATA_VERDICT && d2.len != set->dlen) - goto err3; - dreg = nft_type_to_reg(set->dtype); list_for_each_entry(binding, &set->bindings, list) { struct nft_ctx bind_ctx = { @@ -4615,19 +4929,19 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, continue; err = nft_validate_register_store(&bind_ctx, dreg, - &data, - d2.type, d2.len); + &elem.data.val, + desc.type, desc.len); if (err < 0) goto err3; - if (d2.type == NFT_DATA_VERDICT && - (data.verdict.code == NFT_GOTO || - data.verdict.code == NFT_JUMP)) + if (desc.type == NFT_DATA_VERDICT && + (elem.data.val.verdict.code == NFT_GOTO || + elem.data.val.verdict.code == NFT_JUMP)) nft_validate_state_update(ctx->net, NFT_VALIDATE_NEED); } - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, d2.len); + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len); } /* The full maximum length of userdata can exceed the maximum @@ -4643,8 +4957,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, } err = -ENOMEM; - elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, data.data, - timeout, expiration, GFP_KERNEL); + elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, + elem.data.val.data, timeout, expiration, + GFP_KERNEL); if (elem.priv == NULL) goto err3; @@ -4656,16 +4971,15 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, udata->len = ulen - 1; nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen); } - if (obj) { + if (obj) *nft_set_ext_obj(ext) = obj; - obj->use++; - } trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); if (trans == NULL) goto err4; - ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK; + ext->genmask = nft_genmask_cur(ctx->net); + err = set->ops->insert(ctx->net, set, &elem, &ext2); if (err) { if (err == -EEXIST) { @@ -4697,7 +5011,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, } nft_trans_elem(trans) = elem; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err6: @@ -4705,14 +5019,15 @@ err6: err5: kfree(trans); err4: - if (obj) - obj->use--; kfree(elem.priv); err3: if (nla[NFTA_SET_ELEM_DATA] != NULL) - nft_data_release(&data, d2.type); + nft_data_release(&elem.data.val, desc.type); err2: - nft_data_release(&elem.key.val, d1.type); + if (obj) + nft_use_dec_restore(&obj->use); + + nft_data_release(&elem.key.val, NFT_DATA_VALUE); err1: return err; } @@ -4722,6 +5037,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, const struct nlattr * const nla[], struct netlink_ext_ack *extack) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); u8 genmask = nft_genmask_next(net); const struct nlattr *attr; struct nft_set *set; @@ -4741,7 +5057,8 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + if (!list_empty(&set->bindings) && + (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS))) return -EBUSY; nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { @@ -4750,7 +5067,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, return err; } - if (net->nft.validate_state == NFT_VALIDATE_DO) + if (nft_net->validate_state == NFT_VALIDATE_DO) return nft_table_validate(net, ctx.table); return 0; @@ -4769,46 +5086,49 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, */ void nft_data_hold(const struct nft_data *data, enum nft_data_types type) { + struct nft_chain *chain; + if (type == NFT_DATA_VERDICT) { switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: - data->verdict.chain->use++; + chain = data->verdict.chain; + nft_use_inc_restore(&chain->use); break; } } } -static void nft_set_elem_activate(const struct net *net, - const struct nft_set *set, - struct nft_set_elem *elem) +static void nft_setelem_data_activate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_hold(nft_set_ext_data(ext), set->dtype); if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) - (*nft_set_ext_obj(ext))->use++; + nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use); } -static void nft_set_elem_deactivate(const struct net *net, - const struct nft_set *set, - struct nft_set_elem *elem) +void nft_setelem_data_deactivate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_release(nft_set_ext_data(ext), set->dtype); if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) - (*nft_set_ext_obj(ext))->use--; + nft_use_dec(&(*nft_set_ext_obj(ext))->use); } +EXPORT_SYMBOL_GPL(nft_setelem_data_deactivate); static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; struct nft_set_ext_tmpl tmpl; - struct nft_data_desc desc; struct nft_set_elem elem; struct nft_set_ext *ext; struct nft_trans *trans; @@ -4819,11 +5139,10 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, err = nla_parse_nested_deprecated(nla, NFTA_SET_ELEM_MAX, attr, nft_set_elem_policy, NULL); if (err < 0) - goto err1; + return err; - err = -EINVAL; if (nla[NFTA_SET_ELEM_KEY] == NULL) - goto err1; + return -EINVAL; nft_set_ext_prepare(&tmpl); @@ -4833,54 +5152,47 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (flags != 0) nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); - err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc, - nla[NFTA_SET_ELEM_KEY]); + err = nft_setelem_parse_key(ctx, set, &elem.key.val, + nla[NFTA_SET_ELEM_KEY]); if (err < 0) - goto err1; - - err = -EINVAL; - if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) - goto err2; + return err; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, desc.len); + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); err = -ENOMEM; elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, NULL, 0, 0, GFP_KERNEL); if (elem.priv == NULL) - goto err2; + goto fail_elem; ext = nft_set_elem_ext(set, elem.priv); if (flags) *nft_set_ext_flags(ext) = flags; trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set); - if (trans == NULL) { - err = -ENOMEM; - goto err3; - } + if (trans == NULL) + goto fail_trans; priv = set->ops->deactivate(ctx->net, set, &elem); if (priv == NULL) { err = -ENOENT; - goto err4; + goto fail_ops; } kfree(elem.priv); elem.priv = priv; - nft_set_elem_deactivate(ctx->net, set, &elem); + nft_setelem_data_deactivate(ctx->net, set, &elem); nft_trans_elem(trans) = elem; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; -err4: +fail_ops: kfree(trans); -err3: +fail_trans: kfree(elem.priv); -err2: - nft_data_release(&elem.key.val, desc.type); -err1: +fail_elem: + nft_data_release(&elem.key.val, NFT_DATA_VALUE); return err; } @@ -4903,10 +5215,10 @@ static int nft_flush_set(const struct nft_ctx *ctx, } set->ndeact++; - nft_set_elem_deactivate(ctx->net, set, elem); + nft_setelem_data_deactivate(ctx->net, set, elem); nft_trans_elem_set(trans) = set; nft_trans_elem(trans) = *elem; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err1: @@ -4933,7 +5245,11 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask); if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + + if (nft_set_is_anonymous(set)) + return -EOPNOTSUPP; + + if (!list_empty(&set->bindings) && (set->flags & NFT_SET_CONSTANT)) return -EBUSY; if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) { @@ -4956,31 +5272,6 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, return err; } -void nft_set_gc_batch_release(struct rcu_head *rcu) -{ - struct nft_set_gc_batch *gcb; - unsigned int i; - - gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu); - for (i = 0; i < gcb->head.cnt; i++) - nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true); - kfree(gcb); -} -EXPORT_SYMBOL_GPL(nft_set_gc_batch_release); - -struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, - gfp_t gfp) -{ - struct nft_set_gc_batch *gcb; - - gcb = kzalloc(sizeof(*gcb), gfp); - if (gcb == NULL) - return gcb; - gcb->head.set = set; - return gcb; -} -EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc); - /* * Stateful objects */ @@ -5203,7 +5494,7 @@ static int nf_tables_updobj(const struct nft_ctx *ctx, nft_trans_obj(trans) = obj; nft_trans_obj_update(trans) = true; nft_trans_obj_newobj(trans) = newobj; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; @@ -5264,9 +5555,14 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); + if (!nft_use_inc(&table->use)) + return -EMFILE; + type = nft_obj_type_get(net, objtype); - if (IS_ERR(type)) - return PTR_ERR(type); + if (IS_ERR(type)) { + err = PTR_ERR(type); + goto err_type; + } obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]); if (IS_ERR(obj)) { @@ -5292,7 +5588,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, goto err4; list_add_tail_rcu(&obj->list, &table->objects); - table->use++; + return 0; err4: /* queued in transaction log */ @@ -5306,6 +5602,9 @@ err2: kfree(obj); err1: module_put(type->owner); +err_type: + nft_use_dec_restore(&table->use); + return err; } @@ -5314,19 +5613,14 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, int family, const struct nft_table *table, struct nft_object *obj, bool reset) { - struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, + NFNETLINK_V0, nft_base_seq(net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) || nla_put_string(skb, NFTA_OBJ_NAME, obj->key.name) || nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) || @@ -5357,6 +5651,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) struct nft_obj_filter *filter = cb->data; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + struct nftables_pernet *nft_net; struct nft_object *obj; bool reset = false; @@ -5364,9 +5659,10 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) reset = true; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -5649,10 +5945,11 @@ void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx, enum nft_trans_phase phase) { switch (phase) { + case NFT_TRANS_PREPARE_ERROR: case NFT_TRANS_PREPARE: case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: - flowtable->use--; + nft_use_dec(&flowtable->use); /* fall through */ default: return; @@ -5769,11 +6066,12 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx, return err; } +/* call under rcu_read_lock */ static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family) { const struct nf_flowtable_type *type; - list_for_each_entry(type, &nf_tables_flowtables, list) { + list_for_each_entry_rcu(type, &nf_tables_flowtables, list) { if (family == type->family) return type; } @@ -5785,9 +6083,13 @@ nft_flowtable_type_get(struct net *net, u8 family) { const struct nf_flowtable_type *type; + rcu_read_lock(); type = __nft_flowtable_type_get(family); - if (type != NULL && try_module_get(type->owner)) + if (type != NULL && try_module_get(type->owner)) { + rcu_read_unlock(); return type; + } + rcu_read_unlock(); lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES @@ -5799,8 +6101,9 @@ nft_flowtable_type_get(struct net *net, u8 family) return ERR_PTR(-ENOENT); } -static void nft_unregister_flowtable_net_hooks(struct net *net, - struct nft_flowtable *flowtable) +static void __nft_unregister_flowtable_net_hooks(struct net *net, + struct nft_flowtable *flowtable, + bool release_netdev) { int i; @@ -5809,9 +6112,17 @@ static void nft_unregister_flowtable_net_hooks(struct net *net, continue; nf_unregister_net_hook(net, &flowtable->ops[i]); + if (release_netdev) + flowtable->ops[i].dev = NULL; } } +static void nft_unregister_flowtable_net_hooks(struct net *net, + struct nft_flowtable *flowtable) +{ + __nft_unregister_flowtable_net_hooks(net, flowtable, false); +} + static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, @@ -5858,9 +6169,14 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); + if (!nft_use_inc(&table->use)) + return -EMFILE; + flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL); - if (!flowtable) - return -ENOMEM; + if (!flowtable) { + err = -ENOMEM; + goto flowtable_alloc; + } flowtable->table = table; flowtable->handle = nf_tables_alloc_handle(table); @@ -5892,6 +6208,9 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, continue; list_for_each_entry(ft, &table->flowtables, list) { + if (!nft_is_active_next(net, ft)) + continue; + for (k = 0; k < ft->ops_len; k++) { if (!ft->ops[k].dev) continue; @@ -5914,7 +6233,6 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, goto err6; list_add_tail_rcu(&flowtable->list, &table->flowtables); - table->use++; return 0; err6: @@ -5932,6 +6250,9 @@ err2: kfree(flowtable->name); err1: kfree(flowtable); +flowtable_alloc: + nft_use_dec_restore(&table->use); + return err; } @@ -5989,20 +6310,15 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, struct nft_flowtable *flowtable) { struct nlattr *nest, *nest_devs; - struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; int i; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, + NFNETLINK_V0, nft_base_seq(net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) || nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) || nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) || @@ -6051,13 +6367,15 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb, unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + struct nftables_pernet *nft_net; struct nft_flowtable *flowtable; const struct nft_table *table; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -6227,21 +6545,17 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; char buf[TASK_COMM_LEN]; int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, 0, AF_UNSPEC, + NFNETLINK_V0, nft_base_seq(net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - - if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)) || + if (nla_put_be32(skb, NFTA_GEN_ID, htonl(nft_net->base_seq)) || nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) || nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current))) goto nla_put_failure; @@ -6274,6 +6588,7 @@ static int nf_tables_flowtable_event(struct notifier_block *this, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct nft_flowtable *flowtable; + struct nftables_pernet *nft_net; struct nft_table *table; struct net *net; @@ -6281,13 +6596,14 @@ static int nf_tables_flowtable_event(struct notifier_block *this, return 0; net = dev_net(dev); - mutex_lock(&net->nft.commit_mutex); - list_for_each_entry(table, &net->nft.tables, list) { + nft_net = net_generic(net, nf_tables_net_id); + mutex_lock(&nft_net->commit_mutex); + list_for_each_entry(table, &nft_net->tables, list) { list_for_each_entry(flowtable, &table->flowtables, list) { nft_flowtable_event(event, dev, flowtable); } } - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); return NOTIFY_DONE; } @@ -6468,19 +6784,22 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { static int nf_tables_validate(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_table *table; - switch (net->nft.validate_state) { + switch (nft_net->validate_state) { case NFT_VALIDATE_SKIP: break; case NFT_VALIDATE_NEED: nft_validate_state_update(net, NFT_VALIDATE_DO); /* fall through */ case NFT_VALIDATE_DO: - list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_validate(net, table) < 0) return -EAGAIN; } + + nft_validate_state_update(net, NFT_VALIDATE_SKIP); break; } @@ -6569,7 +6888,7 @@ static void nft_commit_release(struct nft_trans *trans) nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); break; case NFT_MSG_DELSET: - nft_set_destroy(nft_trans_set(trans)); + nft_set_destroy(&trans->ctx, nft_trans_set(trans)); break; case NFT_MSG_DELSETELEM: nf_tables_set_elem_destroy(&trans->ctx, @@ -6605,7 +6924,7 @@ static void nf_tables_trans_destroy_work(struct work_struct *w) synchronize_rcu(); list_for_each_entry_safe(trans, next, &head, list) { - list_del(&trans->list); + nft_trans_list_del(trans); nft_commit_release(trans); } } @@ -6649,9 +6968,10 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha static void nf_tables_commit_chain_prepare_cancel(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans, *next; - list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { struct nft_chain *chain = trans->ctx.chain; if (trans->msg_type == NFT_MSG_NEWRULE || @@ -6747,12 +7067,204 @@ static void nft_chain_del(struct nft_chain *chain) list_del_rcu(&chain->list); } +static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx, + struct nft_trans_gc *trans) +{ + void **priv = trans->priv; + unsigned int i; + + for (i = 0; i < trans->count; i++) { + struct nft_set_elem elem = { + .priv = priv[i], + }; + + nft_setelem_data_deactivate(ctx->net, trans->set, &elem); + trans->set->ops->remove(trans->net, trans->set, &elem); + } +} + +void nft_trans_gc_destroy(struct nft_trans_gc *trans) +{ + nft_set_put(trans->set); + put_net(trans->net); + kfree(trans); +} +EXPORT_SYMBOL_GPL(nft_trans_gc_destroy); + +static void nft_trans_gc_trans_free(struct rcu_head *rcu) +{ + struct nft_set_elem elem = {}; + struct nft_trans_gc *trans; + struct nft_ctx ctx = {}; + unsigned int i; + + trans = container_of(rcu, struct nft_trans_gc, rcu); + ctx.net = read_pnet(&trans->set->net); + + for (i = 0; i < trans->count; i++) { + elem.priv = trans->priv[i]; + atomic_dec(&trans->set->nelems); + + nf_tables_set_elem_destroy(&ctx, trans->set, elem.priv); + } + + nft_trans_gc_destroy(trans); +} + +static bool nft_trans_gc_work_done(struct nft_trans_gc *trans) +{ + struct nftables_pernet *nft_net; + struct nft_ctx ctx = {}; + + nft_net = net_generic(trans->net, nf_tables_net_id); + + mutex_lock(&nft_net->commit_mutex); + + /* Check for race with transaction, otherwise this batch refers to + * stale objects that might not be there anymore. Skip transaction if + * set has been destroyed from control plane transaction in case gc + * worker loses race. + */ + if (READ_ONCE(nft_net->gc_seq) != trans->seq || trans->set->dead) { + mutex_unlock(&nft_net->commit_mutex); + return false; + } + + ctx.net = trans->net; + ctx.table = trans->set->table; + + nft_trans_gc_setelem_remove(&ctx, trans); + mutex_unlock(&nft_net->commit_mutex); + + return true; +} + +static void nft_trans_gc_work(struct work_struct *work) +{ + struct nft_trans_gc *trans, *next; + LIST_HEAD(trans_gc_list); + + spin_lock(&nf_tables_gc_list_lock); + list_splice_init(&nf_tables_gc_list, &trans_gc_list); + spin_unlock(&nf_tables_gc_list_lock); + + list_for_each_entry_safe(trans, next, &trans_gc_list, list) { + list_del(&trans->list); + if (!nft_trans_gc_work_done(trans)) { + nft_trans_gc_destroy(trans); + continue; + } + call_rcu(&trans->rcu, nft_trans_gc_trans_free); + } +} + +struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, + unsigned int gc_seq, gfp_t gfp) +{ + struct net *net = read_pnet(&set->net); + struct nft_trans_gc *trans; + + trans = kzalloc(sizeof(*trans), gfp); + if (!trans) + return NULL; + + trans->net = maybe_get_net(net); + if (!trans->net) { + kfree(trans); + return NULL; + } + + refcount_inc(&set->refs); + trans->set = set; + trans->seq = gc_seq; + + return trans; +} +EXPORT_SYMBOL_GPL(nft_trans_gc_alloc); + +void nft_trans_gc_elem_add(struct nft_trans_gc *trans, void *priv) +{ + trans->priv[trans->count++] = priv; +} +EXPORT_SYMBOL_GPL(nft_trans_gc_elem_add); + +static void nft_trans_gc_queue_work(struct nft_trans_gc *trans) +{ + spin_lock(&nf_tables_gc_list_lock); + list_add_tail(&trans->list, &nf_tables_gc_list); + spin_unlock(&nf_tables_gc_list_lock); + + schedule_work(&trans_gc_work); +} + +static int nft_trans_gc_space(struct nft_trans_gc *trans) +{ + return NFT_TRANS_GC_BATCHCOUNT - trans->count; +} + +struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, + unsigned int gc_seq, gfp_t gfp) +{ + struct nft_set *set; + + if (nft_trans_gc_space(gc)) + return gc; + + set = gc->set; + nft_trans_gc_queue_work(gc); + + return nft_trans_gc_alloc(set, gc_seq, gfp); +} +EXPORT_SYMBOL_GPL(nft_trans_gc_queue_async); + +void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans) +{ + if (trans->count == 0) { + nft_trans_gc_destroy(trans); + return; + } + + nft_trans_gc_queue_work(trans); +} +EXPORT_SYMBOL_GPL(nft_trans_gc_queue_async_done); + +struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp) +{ + struct nft_set *set; + + if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net))) + return NULL; + + if (nft_trans_gc_space(gc)) + return gc; + + set = gc->set; + call_rcu(&gc->rcu, nft_trans_gc_trans_free); + + return nft_trans_gc_alloc(set, 0, gfp); +} +EXPORT_SYMBOL_GPL(nft_trans_gc_queue_sync); + +void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans) +{ + WARN_ON_ONCE(!lockdep_commit_lock_is_held(trans->net)); + + if (trans->count == 0) { + nft_trans_gc_destroy(trans); + return; + } + + call_rcu(&trans->rcu, nft_trans_gc_trans_free); +} +EXPORT_SYMBOL_GPL(nft_trans_gc_queue_sync_done); + static void nf_tables_module_autoload_cleanup(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_module_request *req, *next; - WARN_ON_ONCE(!list_empty(&net->nft.commit_list)); - list_for_each_entry_safe(req, next, &net->nft.module_list, list) { + WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + list_for_each_entry_safe(req, next, &nft_net->module_list, list) { WARN_ON_ONCE(!req->done); list_del(&req->list); kfree(req); @@ -6761,6 +7273,7 @@ static void nf_tables_module_autoload_cleanup(struct net *net) static void nf_tables_commit_release(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans; /* all side effects have to be made visible. @@ -6770,41 +7283,71 @@ static void nf_tables_commit_release(struct net *net) * Memory reclaim happens asynchronously from work queue * to prevent expensive synchronize_rcu() in commit phase. */ - if (list_empty(&net->nft.commit_list)) { + if (list_empty(&nft_net->commit_list)) { nf_tables_module_autoload_cleanup(net); - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); return; } - trans = list_last_entry(&net->nft.commit_list, + trans = list_last_entry(&nft_net->commit_list, struct nft_trans, list); get_net(trans->ctx.net); WARN_ON_ONCE(trans->put_net); trans->put_net = true; spin_lock(&nf_tables_destroy_list_lock); - list_splice_tail_init(&net->nft.commit_list, &nf_tables_destroy_list); + list_splice_tail_init(&nft_net->commit_list, &nf_tables_destroy_list); spin_unlock(&nf_tables_destroy_list_lock); nf_tables_module_autoload_cleanup(net); schedule_work(&trans_destroy_work); - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); +} + +static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net) +{ + unsigned int gc_seq; + + /* Bump gc counter, it becomes odd, this is the busy mark. */ + gc_seq = READ_ONCE(nft_net->gc_seq); + WRITE_ONCE(nft_net->gc_seq, ++gc_seq); + + return gc_seq; +} + +static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq) +{ + WRITE_ONCE(nft_net->gc_seq, ++gc_seq); } static int nf_tables_commit(struct net *net, struct sk_buff *skb) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans, *next; struct nft_trans_elem *te; struct nft_chain *chain; struct nft_table *table; + unsigned int gc_seq; int err; - if (list_empty(&net->nft.commit_list)) { - mutex_unlock(&net->nft.commit_mutex); + if (list_empty(&nft_net->commit_list)) { + mutex_unlock(&nft_net->commit_mutex); return 0; } + list_for_each_entry(trans, &nft_net->binding_list, binding_list) { + switch (trans->msg_type) { + case NFT_MSG_NEWSET: + if (nft_set_is_anonymous(nft_trans_set(trans)) && + !nft_trans_set_bound(trans)) { + pr_warn_once("nftables ruleset with unbound set\n"); + return -EINVAL; + } + break; + } + } + /* 0. Validate ruleset, otherwise roll back for error reporting. */ if (nf_tables_validate(net) < 0) return -EAGAIN; @@ -6814,7 +7357,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) return err; /* 1. Allocate space for next generation rules_gen_X[] */ - list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { int ret; if (trans->msg_type == NFT_MSG_NEWRULE || @@ -6830,7 +7373,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } /* step 2. Make rules_gen_X visible to packet path */ - list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(table, &nft_net->tables, list) { list_for_each_entry(chain, &table->chains, list) nf_tables_commit_chain(net, chain); } @@ -6839,20 +7382,26 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) * Bump generation counter, invalidate any dump in progress. * Cannot fail after this point. */ - while (++net->nft.base_seq == 0); + while (++nft_net->base_seq == 0) + ; + + gc_seq = nft_gc_seq_begin(nft_net); /* step 3. Start new generation, rules_gen_X now in use. */ net->nft.gencursor = nft_gencursor_next(net); - list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (!nft_trans_table_enable(trans)) { - nf_tables_table_disable(net, - trans->ctx.table); - trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; + if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) { + nft_trans_destroy(trans); + break; } + if (trans->ctx.table->flags & NFT_TABLE_F_DORMANT) + nf_tables_table_disable(net, trans->ctx.table); + + trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; } else { nft_clear(net, trans->ctx.table); } @@ -6887,6 +7436,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_rule_notify(&trans->ctx, nft_trans_rule(trans), NFT_MSG_NEWRULE); + if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + nft_flow_rule_destroy(nft_trans_flow_rule(trans)); + nft_trans_destroy(trans); break; case NFT_MSG_DELRULE: @@ -6897,6 +7449,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans), NFT_TRANS_COMMIT); + + if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_NEWSET: nft_clear(net, nft_trans_set(trans)); @@ -6905,13 +7460,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) */ if (nft_set_is_anonymous(nft_trans_set(trans)) && !list_empty(&nft_trans_set(trans)->bindings)) - trans->ctx.table->use--; + nft_use_dec(&trans->ctx.table->use); nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), NFT_MSG_NEWSET, GFP_KERNEL); nft_trans_destroy(trans); break; case NFT_MSG_DELSET: + nft_trans_set(trans)->dead = 1; list_del_rcu(&nft_trans_set(trans)->list); nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), NFT_MSG_DELSET, GFP_KERNEL); @@ -6973,6 +7529,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); + + nft_gc_seq_end(nft_net, gc_seq); nf_tables_commit_release(net); return 0; @@ -6980,17 +7538,18 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) static void nf_tables_module_autoload(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_module_request *req, *next; LIST_HEAD(module_list); - list_splice_init(&net->nft.module_list, &module_list); - mutex_unlock(&net->nft.commit_mutex); + list_splice_init(&nft_net->module_list, &module_list); + mutex_unlock(&nft_net->commit_mutex); list_for_each_entry_safe(req, next, &module_list, list) { request_module("%s", req->module); req->done = true; } - mutex_lock(&net->nft.commit_mutex); - list_splice(&module_list, &net->nft.module_list); + mutex_lock(&nft_net->commit_mutex); + list_splice(&module_list, &nft_net->module_list); } static void nf_tables_abort_release(struct nft_trans *trans) @@ -7006,7 +7565,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); break; case NFT_MSG_NEWSET: - nft_set_destroy(nft_trans_set(trans)); + nft_set_destroy(&trans->ctx, nft_trans_set(trans)); break; case NFT_MSG_NEWSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), @@ -7024,23 +7583,31 @@ static void nf_tables_abort_release(struct nft_trans *trans) static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans, *next; struct nft_trans_elem *te; + int err = 0; if (action == NFNL_ABORT_VALIDATE && nf_tables_validate(net) < 0) - return -EAGAIN; + err = -EAGAIN; - list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list, + list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (nft_trans_table_enable(trans)) { - nf_tables_table_disable(net, - trans->ctx.table); + if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) { + nft_trans_destroy(trans); + break; + } + if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_DORMANT) { + nf_tables_table_disable(net, trans->ctx.table); trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; + } else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) { + trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT; } + trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; nft_trans_destroy(trans); } else { list_del_rcu(&trans->ctx.table->list); @@ -7056,7 +7623,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) kfree(nft_trans_chain_name(trans)); nft_trans_destroy(trans); } else { - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); nft_chain_del(trans->ctx.chain); nf_tables_unregister_hook(trans->ctx.net, trans->ctx.table, @@ -7064,34 +7631,37 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) } break; case NFT_MSG_DELCHAIN: - trans->ctx.table->use++; + nft_use_inc_restore(&trans->ctx.table->use); nft_clear(trans->ctx.net, trans->ctx.chain); nft_trans_destroy(trans); break; case NFT_MSG_NEWRULE: - trans->ctx.chain->use--; + nft_use_dec_restore(&trans->ctx.chain->use); list_del_rcu(&nft_trans_rule(trans)->list); nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans), NFT_TRANS_ABORT); break; case NFT_MSG_DELRULE: - trans->ctx.chain->use++; + nft_use_inc_restore(&trans->ctx.chain->use); nft_clear(trans->ctx.net, nft_trans_rule(trans)); nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWSET: - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); if (nft_trans_set_bound(trans)) { nft_trans_destroy(trans); break; } + nft_trans_set(trans)->dead = 1; list_del_rcu(&nft_trans_set(trans)->list); break; case NFT_MSG_DELSET: - trans->ctx.table->use++; + nft_use_inc_restore(&trans->ctx.table->use); nft_clear(trans->ctx.net, nft_trans_set(trans)); + if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_activate(&trans->ctx, nft_trans_set(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: @@ -7106,7 +7676,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DELSETELEM: te = (struct nft_trans_elem *)trans->data; - nft_set_elem_activate(net, te->set, &te->elem); + nft_setelem_data_activate(net, te->set, &te->elem); te->set->ops->activate(net, te->set, &te->elem); te->set->ndeact--; @@ -7117,23 +7687,23 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans)); nft_trans_destroy(trans); } else { - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); nft_obj_del(nft_trans_obj(trans)); } break; case NFT_MSG_DELOBJ: - trans->ctx.table->use++; + nft_use_inc_restore(&trans->ctx.table->use); nft_clear(trans->ctx.net, nft_trans_obj(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWFLOWTABLE: - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); list_del_rcu(&nft_trans_flowtable(trans)->list); nft_unregister_flowtable_net_hooks(net, nft_trans_flowtable(trans)); break; case NFT_MSG_DELFLOWTABLE: - trans->ctx.table->use++; + nft_use_inc_restore(&trans->ctx.table->use); nft_clear(trans->ctx.net, nft_trans_flowtable(trans)); nft_trans_destroy(trans); break; @@ -7143,43 +7713,50 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) synchronize_rcu(); list_for_each_entry_safe_reverse(trans, next, - &net->nft.commit_list, list) { - list_del(&trans->list); + &nft_net->commit_list, list) { + nft_trans_list_del(trans); nf_tables_abort_release(trans); } - if (action == NFNL_ABORT_AUTOLOAD) - nf_tables_module_autoload(net); - else - nf_tables_module_autoload_cleanup(net); - - return 0; -} - -static void nf_tables_cleanup(struct net *net) -{ - nft_validate_state_update(net, NFT_VALIDATE_SKIP); + return err; } static int nf_tables_abort(struct net *net, struct sk_buff *skb, enum nfnl_abort_action action) { - int ret = __nf_tables_abort(net, action); + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + unsigned int gc_seq; + int ret; + + gc_seq = nft_gc_seq_begin(nft_net); + ret = __nf_tables_abort(net, action); + nft_gc_seq_end(nft_net, gc_seq); + + WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + + /* module autoload needs to happen after GC sequence update because it + * temporarily releases and grabs mutex again. + */ + if (action == NFNL_ABORT_AUTOLOAD) + nf_tables_module_autoload(net); + else + nf_tables_module_autoload_cleanup(net); - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); return ret; } static bool nf_tables_valid_genid(struct net *net, u32 genid) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); bool genid_ok; - mutex_lock(&net->nft.commit_mutex); + mutex_lock(&nft_net->commit_mutex); - genid_ok = genid == 0 || net->nft.base_seq == genid; + genid_ok = genid == 0 || nft_net->base_seq == genid; if (!genid_ok) - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); /* else, commit mutex has to be released by commit or abort function */ return genid_ok; @@ -7192,7 +7769,6 @@ static const struct nfnetlink_subsystem nf_tables_subsys = { .cb = nf_tables_cb, .commit = nf_tables_commit, .abort = nf_tables_abort, - .cleanup = nf_tables_cleanup, .valid_genid = nf_tables_valid_genid, .owner = THIS_MODULE, }; @@ -7354,28 +7930,24 @@ int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) } EXPORT_SYMBOL_GPL(nft_parse_u32_check); -/** - * nft_parse_register - parse a register value from a netlink attribute - * - * @attr: netlink attribute - * - * Parse and translate a register value from a netlink attribute. - * Registers used to be 128 bit wide, these register numbers will be - * mapped to the corresponding 32 bit register numbers. - */ -unsigned int nft_parse_register(const struct nlattr *attr) +static int nft_parse_register(const struct nlattr *attr, u32 *preg) { unsigned int reg; reg = ntohl(nla_get_be32(attr)); switch (reg) { case NFT_REG_VERDICT...NFT_REG_4: - return reg * NFT_REG_SIZE / NFT_REG32_SIZE; + *preg = reg * NFT_REG_SIZE / NFT_REG32_SIZE; + break; + case NFT_REG32_00...NFT_REG32_15: + *preg = reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00; + break; default: - return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00; + return -ERANGE; } + + return 0; } -EXPORT_SYMBOL_GPL(nft_parse_register); /** * nft_dump_register - dump a register value to a netlink attribute @@ -7408,7 +7980,7 @@ EXPORT_SYMBOL_GPL(nft_dump_register); * Validate that the input register is one of the general purpose * registers and that the length of the load is within the bounds. */ -int nft_validate_register_load(enum nft_registers reg, unsigned int len) +static int nft_validate_register_load(enum nft_registers reg, unsigned int len) { if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE) return -EINVAL; @@ -7419,7 +7991,24 @@ int nft_validate_register_load(enum nft_registers reg, unsigned int len) return 0; } -EXPORT_SYMBOL_GPL(nft_validate_register_load); + +int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len) +{ + u32 reg; + int err; + + err = nft_parse_register(attr, ®); + if (err < 0) + return err; + + err = nft_validate_register_load(reg, len); + if (err < 0) + return err; + + *sreg = reg; + return 0; +} +EXPORT_SYMBOL_GPL(nft_parse_register_load); /** * nft_validate_register_store - validate an expressions' register store @@ -7435,10 +8024,11 @@ EXPORT_SYMBOL_GPL(nft_validate_register_load); * A value of NULL for the data means that its runtime gathered * data. */ -int nft_validate_register_store(const struct nft_ctx *ctx, - enum nft_registers reg, - const struct nft_data *data, - enum nft_data_types type, unsigned int len) +static int nft_validate_register_store(const struct nft_ctx *ctx, + enum nft_registers reg, + const struct nft_data *data, + enum nft_data_types type, + unsigned int len) { int err; @@ -7470,7 +8060,27 @@ int nft_validate_register_store(const struct nft_ctx *ctx, return 0; } } -EXPORT_SYMBOL_GPL(nft_validate_register_store); + +int nft_parse_register_store(const struct nft_ctx *ctx, + const struct nlattr *attr, u8 *dreg, + const struct nft_data *data, + enum nft_data_types type, unsigned int len) +{ + int err; + u32 reg; + + err = nft_parse_register(attr, ®); + if (err < 0) + return err; + + err = nft_validate_register_store(ctx, reg, data, type, len); + if (err < 0) + return err; + + *dreg = reg; + return 0; +} +EXPORT_SYMBOL_GPL(nft_parse_register_store); static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = { [NFTA_VERDICT_CODE] = { .type = NLA_U32 }, @@ -7493,19 +8103,16 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, if (!tb[NFTA_VERDICT_CODE]) return -EINVAL; + + /* zero padding hole for memcmp */ + memset(data, 0, sizeof(*data)); data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); switch (data->verdict.code) { - default: - switch (data->verdict.code & NF_VERDICT_MASK) { - case NF_ACCEPT: - case NF_DROP: - case NF_QUEUE: - break; - default: - return -EINVAL; - } - /* fall through */ + case NF_ACCEPT: + case NF_DROP: + case NF_QUEUE: + break; case NFT_CONTINUE: case NFT_BREAK: case NFT_RETURN: @@ -7520,10 +8127,13 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, return PTR_ERR(chain); if (nft_is_base_chain(chain)) return -EOPNOTSUPP; + if (!nft_use_inc(&chain->use)) + return -EMFILE; - chain->use++; data->verdict.chain = chain; break; + default: + return -EINVAL; } desc->len = sizeof(data->verdict); @@ -7533,10 +8143,13 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, static void nft_verdict_uninit(const struct nft_data *data) { + struct nft_chain *chain; + switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: - data->verdict.chain->use--; + chain = data->verdict.chain; + nft_use_dec(&chain->use); break; } } @@ -7690,32 +8303,40 @@ int __nft_release_basechain(struct nft_ctx *ctx) nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain); list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { list_del(&rule->list); - ctx->chain->use--; + nft_use_dec(&ctx->chain->use); nf_tables_rule_release(ctx, rule); } nft_chain_del(ctx->chain); - ctx->table->use--; + nft_use_dec(&ctx->table->use); nf_tables_chain_destroy(ctx); return 0; } EXPORT_SYMBOL_GPL(__nft_release_basechain); +static void __nft_release_hook(struct net *net, struct nft_table *table) +{ + struct nft_flowtable *flowtable; + struct nft_chain *chain; + + list_for_each_entry(chain, &table->chains, list) + __nf_tables_unregister_hook(net, table, chain, true); + list_for_each_entry(flowtable, &table->flowtables, list) + __nft_unregister_flowtable_net_hooks(net, flowtable, true); +} + static void __nft_release_hooks(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_table *table; - struct nft_chain *chain; - list_for_each_entry(table, &net->nft.tables, list) { - list_for_each_entry(chain, &table->chains, list) - nf_tables_unregister_hook(net, table, chain); - } + list_for_each_entry(table, &nft_net->tables, list) + __nft_release_hook(net, table); } -static void __nft_release_tables(struct net *net) +static void __nft_release_table(struct net *net, struct nft_table *table) { struct nft_flowtable *flowtable, *nf; - struct nft_table *table, *nt; struct nft_chain *chain, *nc; struct nft_object *obj, *ne; struct nft_rule *rule, *nr; @@ -7725,75 +8346,114 @@ static void __nft_release_tables(struct net *net) .family = NFPROTO_NETDEV, }; - list_for_each_entry_safe(table, nt, &net->nft.tables, list) { - ctx.family = table->family; - ctx.table = table; - list_for_each_entry(chain, &table->chains, list) { - ctx.chain = chain; - list_for_each_entry_safe(rule, nr, &chain->rules, list) { - list_del(&rule->list); - chain->use--; - nf_tables_rule_release(&ctx, rule); - } - } - list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) { - list_del(&flowtable->list); - table->use--; - nf_tables_flowtable_destroy(flowtable); - } - list_for_each_entry_safe(set, ns, &table->sets, list) { - list_del(&set->list); - table->use--; - nft_set_destroy(set); - } - list_for_each_entry_safe(obj, ne, &table->objects, list) { - nft_obj_del(obj); - table->use--; - nft_obj_destroy(&ctx, obj); - } - list_for_each_entry_safe(chain, nc, &table->chains, list) { - ctx.chain = chain; - nft_chain_del(chain); - table->use--; - nf_tables_chain_destroy(&ctx); + ctx.family = table->family; + ctx.table = table; + list_for_each_entry(chain, &table->chains, list) { + ctx.chain = chain; + list_for_each_entry_safe(rule, nr, &chain->rules, list) { + list_del(&rule->list); + nft_use_dec(&chain->use); + nf_tables_rule_release(&ctx, rule); } - list_del(&table->list); - nf_tables_table_destroy(&ctx); } + list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) { + list_del(&flowtable->list); + nft_use_dec(&table->use); + nf_tables_flowtable_destroy(flowtable); + } + list_for_each_entry_safe(set, ns, &table->sets, list) { + list_del(&set->list); + nft_use_dec(&table->use); + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(&ctx, set); + + nft_set_destroy(&ctx, set); + } + list_for_each_entry_safe(obj, ne, &table->objects, list) { + nft_obj_del(obj); + nft_use_dec(&table->use); + nft_obj_destroy(&ctx, obj); + } + list_for_each_entry_safe(chain, nc, &table->chains, list) { + ctx.chain = chain; + nft_chain_del(chain); + nft_use_dec(&table->use); + nf_tables_chain_destroy(&ctx); + } + list_del(&table->list); + nf_tables_table_destroy(&ctx); +} + +static void __nft_release_tables(struct net *net) +{ + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + struct nft_table *table, *nt; + + list_for_each_entry_safe(table, nt, &nft_net->tables, list) + __nft_release_table(net, table); } static int __net_init nf_tables_init_net(struct net *net) { - INIT_LIST_HEAD(&net->nft.tables); - INIT_LIST_HEAD(&net->nft.commit_list); - INIT_LIST_HEAD(&net->nft.module_list); - mutex_init(&net->nft.commit_mutex); - net->nft.base_seq = 1; - net->nft.validate_state = NFT_VALIDATE_SKIP; + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + INIT_LIST_HEAD(&nft_net->tables); + INIT_LIST_HEAD(&nft_net->commit_list); + INIT_LIST_HEAD(&nft_net->binding_list); + INIT_LIST_HEAD(&nft_net->module_list); + INIT_LIST_HEAD(&nft_net->notify_list); + mutex_init(&nft_net->commit_mutex); + nft_net->base_seq = 1; + nft_net->validate_state = NFT_VALIDATE_SKIP; + nft_net->gc_seq = 0; return 0; } static void __net_exit nf_tables_pre_exit_net(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + mutex_lock(&nft_net->commit_mutex); __nft_release_hooks(net); + mutex_unlock(&nft_net->commit_mutex); } static void __net_exit nf_tables_exit_net(struct net *net) { - mutex_lock(&net->nft.commit_mutex); - if (!list_empty(&net->nft.commit_list)) - __nf_tables_abort(net, NFNL_ABORT_NONE); + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + unsigned int gc_seq; + + mutex_lock(&nft_net->commit_mutex); + + gc_seq = nft_gc_seq_begin(nft_net); + + WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + + if (!list_empty(&nft_net->module_list)) + nf_tables_module_autoload_cleanup(net); + __nft_release_tables(net); - mutex_unlock(&net->nft.commit_mutex); - WARN_ON_ONCE(!list_empty(&net->nft.tables)); - WARN_ON_ONCE(!list_empty(&net->nft.module_list)); + + nft_gc_seq_end(nft_net, gc_seq); + + mutex_unlock(&nft_net->commit_mutex); + WARN_ON_ONCE(!list_empty(&nft_net->tables)); + WARN_ON_ONCE(!list_empty(&nft_net->module_list)); +} + +static void nf_tables_exit_batch(struct list_head *net_exit_list) +{ + flush_work(&trans_gc_work); } static struct pernet_operations nf_tables_net_ops = { .init = nf_tables_init_net, .pre_exit = nf_tables_pre_exit_net, .exit = nf_tables_exit_net, + .exit_batch = nf_tables_exit_batch, + .id = &nf_tables_net_id, + .size = sizeof(struct nftables_pernet), }; static int __init nf_tables_module_init(void) @@ -7855,7 +8515,9 @@ static void __exit nf_tables_module_exit(void) unregister_netdevice_notifier(&nf_tables_flowtable_notifier); nft_chain_filter_fini(); nft_chain_route_fini(); + nf_tables_trans_destroy_flush_work(); unregister_pernet_subsys(&nf_tables_net_ops); + cancel_work_sync(&trans_gc_work); cancel_work_sync(&trans_destroy_work); rcu_barrier(); rhltable_destroy(&nft_objname_ht); diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 2d3bc22c855c..1e691eff1c40 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -7,6 +7,8 @@ #include <net/netfilter/nf_tables_offload.h> #include <net/pkt_cls.h> +extern unsigned int nf_tables_net_id; + static struct nft_flow_rule *nft_flow_rule_alloc(int num_actions) { struct nft_flow_rule *flow; @@ -345,11 +347,12 @@ static int nft_flow_offload_chain(struct nft_chain *chain, int nft_flow_rule_offload_commit(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans; int err = 0; u8 policy; - list_for_each_entry(trans, &net->nft.commit_list, list) { + list_for_each_entry(trans, &nft_net->commit_list, list) { if (trans->ctx.family != NFPROTO_NETDEV) continue; @@ -400,7 +403,7 @@ int nft_flow_rule_offload_commit(struct net *net) break; } - list_for_each_entry(trans, &net->nft.commit_list, list) { + list_for_each_entry(trans, &nft_net->commit_list, list) { if (trans->ctx.family != NFPROTO_NETDEV) continue; @@ -419,14 +422,14 @@ int nft_flow_rule_offload_commit(struct net *net) return err; } -static struct nft_chain *__nft_offload_get_chain(struct net_device *dev) +static struct nft_chain *__nft_offload_get_chain(const struct nftables_pernet *nft_net, + struct net_device *dev) { struct nft_base_chain *basechain; - struct net *net = dev_net(dev); const struct nft_table *table; struct nft_chain *chain; - list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(table, &nft_net->tables, list) { if (table->family != NFPROTO_NETDEV) continue; @@ -450,18 +453,20 @@ static void nft_indr_block_cb(struct net_device *dev, flow_indr_block_bind_cb_t *cb, void *cb_priv, enum flow_block_command cmd) { + struct nftables_pernet *nft_net; struct net *net = dev_net(dev); struct nft_chain *chain; - mutex_lock(&net->nft.commit_mutex); - chain = __nft_offload_get_chain(dev); + nft_net = net_generic(net, nf_tables_net_id); + mutex_lock(&nft_net->commit_mutex); + chain = __nft_offload_get_chain(nft_net, dev); if (chain && chain->flags & NFT_CHAIN_HW_OFFLOAD) { struct nft_base_chain *basechain; basechain = nft_base_chain(chain); nft_indr_block_ing_cmd(dev, basechain, cb, cb_priv, cmd); } - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); } static void nft_offload_chain_clean(struct nft_chain *chain) @@ -480,17 +485,19 @@ static int nft_offload_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct nftables_pernet *nft_net; struct net *net = dev_net(dev); struct nft_chain *chain; if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; - mutex_lock(&net->nft.commit_mutex); - chain = __nft_offload_get_chain(dev); + nft_net = net_generic(net, nf_tables_net_id); + mutex_lock(&nft_net->commit_mutex); + chain = __nft_offload_get_chain(nft_net, dev); if (chain) nft_offload_chain_clean(chain); - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); return NOTIFY_DONE; } diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 87b36da5cd98..0cf3278007ba 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -183,7 +183,6 @@ static bool nft_trace_have_verdict_chain(struct nft_traceinfo *info) void nft_trace_notify(struct nft_traceinfo *info) { const struct nft_pktinfo *pkt = info->pkt; - struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; struct sk_buff *skb; unsigned int size; @@ -219,15 +218,11 @@ void nft_trace_notify(struct nft_traceinfo *info) return; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_TRACE); - nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct nfgenmsg), 0); + nlh = nfnl_msg_put(skb, 0, 0, event, 0, info->basechain->type->family, + NFNETLINK_V0, 0); if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = info->basechain->type->family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(nft_pf(pkt)))) goto nla_put_failure; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 81c86a156c6c..3cff23c814c3 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -455,7 +455,8 @@ ack: * processed, this avoids that the same error is * reported several times when replaying the batch. */ - if (nfnl_err_add(&err_list, nlh, err, &extack) < 0) { + if (err == -ENOMEM || + nfnl_err_add(&err_list, nlh, err, &extack) < 0) { /* We failed to enqueue an error, reset the * list of errors and send OOM to userspace * pointing to the batch header. @@ -512,8 +513,6 @@ done: goto replay_abort; } } - if (ss->cleanup) - ss->cleanup(net); nfnl_err_deliver(&err_list, oskb); kfree_skb(skb); diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 2481470dec36..4b46421c5e17 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -132,21 +132,16 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct nf_acct *acct) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; u64 pkts, bytes; u32 old_flags; event = nfnl_msg_type(NFNL_SUBSYS_ACCT, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (nla_put_string(skb, NFACCT_NAME, acct->name)) goto nla_put_failure; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 3d5fc07b2530..cb9c2e955996 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -530,20 +530,15 @@ nfnl_cthelper_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct nf_conntrack_helper *helper) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; int status; event = nfnl_msg_type(NFNL_SUBSYS_CTHELPER, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (nla_put_string(skb, NFCTH_NAME, helper->name)) goto nla_put_failure; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index da915c224a82..a854e1560cc1 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -160,22 +160,17 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct ctnl_timeout *timeout) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; const struct nf_conntrack_l4proto *l4proto = timeout->timeout.l4proto; struct nlattr *nest_parms; int ret; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (nla_put_string(skb, CTA_TIMEOUT_NAME, timeout->name) || nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->timeout.l3num)) || @@ -382,21 +377,16 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid, const unsigned int *timeouts) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; struct nlattr *nest_parms; int ret; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l3num)) || nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto)) goto nla_put_failure; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b36af4741ad3..80c09070ea9f 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -452,20 +452,15 @@ __build_packet_message(struct nfnl_log_net *log, { struct nfulnl_msg_packet_hdr pmsg; struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; sk_buff_data_t old_tail = inst->skb->tail; struct sock *sk; const unsigned char *hwhdrp; - nlh = nlmsg_put(inst->skb, 0, 0, - nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET), - sizeof(struct nfgenmsg), 0); + nlh = nfnl_msg_put(inst->skb, 0, 0, + nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET), + 0, pf, NFNETLINK_V0, htons(inst->group_num)); if (!nlh) return -1; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = pf; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(inst->group_num); memset(&pmsg, 0, sizeof(pmsg)); pmsg.hw_protocol = skb->protocol; @@ -688,8 +683,8 @@ nfulnl_log_packet(struct net *net, unsigned int plen = 0; struct nfnl_log_net *log = nfnl_log_pernet(net); const struct nfnl_ct_hook *nfnl_ct = NULL; + enum ip_conntrack_info ctinfo = 0; struct nf_conn *ct = NULL; - enum ip_conntrack_info uninitialized_var(ctinfo); if (li_user && li_user->type == NF_LOG_TYPE_ULOG) li = li_user; diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index 79fbf37291f3..573a372e760f 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -269,6 +269,7 @@ bool nf_osf_find(const struct sk_buff *skb, struct nf_osf_hdr_ctx ctx; const struct tcphdr *tcp; struct tcphdr _tcph; + bool found = false; memset(&ctx, 0, sizeof(ctx)); @@ -283,10 +284,11 @@ bool nf_osf_find(const struct sk_buff *skb, data->genre = f->genre; data->version = f->version; + found = true; break; } - return true; + return found; } EXPORT_SYMBOL_GPL(nf_osf_find); @@ -314,6 +316,14 @@ static int nfnl_osf_add_callback(struct net *net, struct sock *ctnl, f = nla_data(osf_attrs[OSF_ATTR_FINGER]); + if (f->opt_num > ARRAY_SIZE(f->opt)) + return -EINVAL; + + if (!memchr(f->genre, 0, MAXGENRELEN) || + !memchr(f->subtype, 0, MAXGENRELEN) || + !memchr(f->version, 0, MAXGENRELEN)) + return -EINVAL; + kf = kmalloc(sizeof(struct nf_osf_finger), GFP_KERNEL); if (!kf) return -ENOMEM; @@ -438,3 +448,4 @@ module_init(nfnl_osf_init); module_exit(nfnl_osf_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 7d3ab08a5a2d..772f8c69818c 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -383,12 +383,11 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct nlattr *nla; struct nfqnl_msg_packet_hdr *pmsg; struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; struct sk_buff *entskb = entry->skb; struct net_device *indev; struct net_device *outdev; struct nf_conn *ct = NULL; - enum ip_conntrack_info uninitialized_var(ctinfo); + enum ip_conntrack_info ctinfo; struct nfnl_ct_hook *nfnl_ct; bool csum_verify; char *secdata = NULL; @@ -469,18 +468,15 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, goto nlmsg_failure; } - nlh = nlmsg_put(skb, 0, 0, - nfnl_msg_type(NFNL_SUBSYS_QUEUE, NFQNL_MSG_PACKET), - sizeof(struct nfgenmsg), 0); + nlh = nfnl_msg_put(skb, 0, 0, + nfnl_msg_type(NFNL_SUBSYS_QUEUE, NFQNL_MSG_PACKET), + 0, entry->state.pf, NFNETLINK_V0, + htons(queue->queue_num)); if (!nlh) { skb_tx_error(entskb); kfree_skb(skb); goto nlmsg_failure; } - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = entry->state.pf; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(queue->queue_num); nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg)); pmsg = nla_data(nla); @@ -846,11 +842,16 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) } static int -nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) +nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff) { struct sk_buff *nskb; if (diff < 0) { + unsigned int min_len = skb_transport_offset(e->skb); + + if (data_len < min_len) + return -EINVAL; + if (pskb_trim(e->skb, data_len)) return -ENOMEM; } else if (diff > 0) { @@ -1183,7 +1184,7 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl, struct nfqnl_instance *queue; unsigned int verdict; struct nf_queue_entry *entry; - enum ip_conntrack_info uninitialized_var(ctinfo); + enum ip_conntrack_info ctinfo; struct nfnl_ct_hook *nfnl_ct; struct nf_conn *ct = NULL; struct nfnl_queue_net *q = nfnl_queue_pernet(net); diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 10e9d50e4e19..ccab2e66d754 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -16,8 +16,8 @@ #include <net/netfilter/nf_tables_offload.h> struct nft_bitwise { - enum nft_registers sreg:8; - enum nft_registers dreg:8; + u8 sreg; + u8 dreg; u8 len; struct nft_data mask; struct nft_data xor; @@ -65,14 +65,14 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, priv->len = len; - priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]); - err = nft_validate_register_load(priv->sreg, priv->len); + err = nft_parse_register_load(tb[NFTA_BITWISE_SREG], &priv->sreg, + priv->len); if (err < 0) return err; - priv->dreg = nft_parse_register(tb[NFTA_BITWISE_DREG]); - err = nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, priv->len); + err = nft_parse_register_store(ctx, tb[NFTA_BITWISE_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + priv->len); if (err < 0) return err; diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index 12bed3f7bbc6..9d250bd60bb8 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -16,8 +16,8 @@ #include <net/netfilter/nf_tables.h> struct nft_byteorder { - enum nft_registers sreg:8; - enum nft_registers dreg:8; + u8 sreg; + u8 dreg; enum nft_byteorder_ops op:8; u8 len; u8 size; @@ -30,28 +30,29 @@ void nft_byteorder_eval(const struct nft_expr *expr, const struct nft_byteorder *priv = nft_expr_priv(expr); u32 *src = ®s->data[priv->sreg]; u32 *dst = ®s->data[priv->dreg]; - union { u32 u32; u16 u16; } *s, *d; + u16 *s16, *d16; unsigned int i; - s = (void *)src; - d = (void *)dst; + s16 = (void *)src; + d16 = (void *)dst; switch (priv->size) { case 8: { + u64 *dst64 = (void *)dst; u64 src64; switch (priv->op) { case NFT_BYTEORDER_NTOH: for (i = 0; i < priv->len / 8; i++) { src64 = nft_reg_load64(&src[i]); - nft_reg_store64(&dst[i], be64_to_cpu(src64)); + nft_reg_store64(&dst64[i], be64_to_cpu(src64)); } break; case NFT_BYTEORDER_HTON: for (i = 0; i < priv->len / 8; i++) { src64 = (__force __u64) cpu_to_be64(nft_reg_load64(&src[i])); - nft_reg_store64(&dst[i], src64); + nft_reg_store64(&dst64[i], src64); } break; } @@ -61,11 +62,11 @@ void nft_byteorder_eval(const struct nft_expr *expr, switch (priv->op) { case NFT_BYTEORDER_NTOH: for (i = 0; i < priv->len / 4; i++) - d[i].u32 = ntohl((__force __be32)s[i].u32); + dst[i] = ntohl((__force __be32)src[i]); break; case NFT_BYTEORDER_HTON: for (i = 0; i < priv->len / 4; i++) - d[i].u32 = (__force __u32)htonl(s[i].u32); + dst[i] = (__force __u32)htonl(src[i]); break; } break; @@ -73,11 +74,11 @@ void nft_byteorder_eval(const struct nft_expr *expr, switch (priv->op) { case NFT_BYTEORDER_NTOH: for (i = 0; i < priv->len / 2; i++) - d[i].u16 = ntohs((__force __be16)s[i].u16); + d16[i] = ntohs((__force __be16)s16[i]); break; case NFT_BYTEORDER_HTON: for (i = 0; i < priv->len / 2; i++) - d[i].u16 = (__force __u16)htons(s[i].u16); + d16[i] = (__force __u16)htons(s16[i]); break; } break; @@ -131,20 +132,20 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, return -EINVAL; } - priv->sreg = nft_parse_register(tb[NFTA_BYTEORDER_SREG]); err = nft_parse_u32_check(tb[NFTA_BYTEORDER_LEN], U8_MAX, &len); if (err < 0) return err; priv->len = len; - err = nft_validate_register_load(priv->sreg, priv->len); + err = nft_parse_register_load(tb[NFTA_BYTEORDER_SREG], &priv->sreg, + priv->len); if (err < 0) return err; - priv->dreg = nft_parse_register(tb[NFTA_BYTEORDER_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, priv->len); + return nft_parse_register_store(ctx, tb[NFTA_BYTEORDER_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + priv->len); } static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index b5d5d071d765..916195dba6f7 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -2,6 +2,7 @@ #include <linux/kernel.h> #include <linux/netdevice.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> #include <net/netfilter/nf_tables.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> @@ -10,6 +11,8 @@ #include <net/netfilter/nf_tables_ipv4.h> #include <net/netfilter/nf_tables_ipv6.h> +extern unsigned int nf_tables_net_id; + #ifdef CONFIG_NF_TABLES_IPV4 static unsigned int nft_do_chain_ipv4(void *priv, struct sk_buff *skb, @@ -293,6 +296,9 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev, if (strcmp(basechain->dev_name, dev->name) != 0) return; + if (!basechain->ops.dev) + return; + /* UNREGISTER events are also happpening on netns exit. * * Altough nf_tables core releases all tables/chains, only @@ -315,6 +321,7 @@ static int nf_tables_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct nftables_pernet *nft_net; struct nft_table *table; struct nft_chain *chain, *nr; struct nft_ctx ctx = { @@ -325,8 +332,9 @@ static int nf_tables_netdev_event(struct notifier_block *this, event != NETDEV_CHANGENAME) return NOTIFY_DONE; - mutex_lock(&ctx.net->nft.commit_mutex); - list_for_each_entry(table, &ctx.net->nft.tables, list) { + nft_net = net_generic(ctx.net, nf_tables_net_id); + mutex_lock(&nft_net->commit_mutex); + list_for_each_entry(table, &nft_net->tables, list) { if (table->family != NFPROTO_NETDEV) continue; @@ -340,7 +348,7 @@ static int nf_tables_netdev_event(struct notifier_block *this, nft_netdev_event(event, dev, &ctx); } } - mutex_unlock(&ctx.net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); return NOTIFY_DONE; } diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index ae730dba60c8..a7c1e7c4381a 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -17,7 +17,7 @@ struct nft_cmp_expr { struct nft_data data; - enum nft_registers sreg:8; + u8 sreg; u8 len; enum nft_cmp_ops op:8; }; @@ -86,8 +86,7 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return err; } - priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]); - err = nft_validate_register_load(priv->sreg, desc.len); + err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len); if (err < 0) return err; @@ -169,8 +168,7 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx, if (err < 0) return err; - priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]); - err = nft_validate_register_load(priv->sreg, desc.len); + err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len); if (err < 0) return err; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index bbe03b9a03b1..fdce5012a4f3 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -192,6 +192,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) { struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1]; + u32 l4proto; u32 flags; int err; @@ -204,12 +205,18 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) return -EINVAL; flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS])); - if (flags & ~NFT_RULE_COMPAT_F_MASK) + if (flags & NFT_RULE_COMPAT_F_UNUSED || + flags & ~NFT_RULE_COMPAT_F_MASK) return -EINVAL; if (flags & NFT_RULE_COMPAT_F_INV) *inv = true; - *proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); + l4proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); + if (l4proto > U16_MAX) + return -EINVAL; + + *proto = l4proto; + return 0; } @@ -327,6 +334,22 @@ static int nft_target_validate(const struct nft_ctx *ctx, unsigned int hook_mask = 0; int ret; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET && + ctx->family != NFPROTO_BRIDGE && + ctx->family != NFPROTO_ARP) + return -EOPNOTSUPP; + + ret = nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING)); + if (ret) + return ret; + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); @@ -569,6 +592,22 @@ static int nft_match_validate(const struct nft_ctx *ctx, unsigned int hook_mask = 0; int ret; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET && + ctx->family != NFPROTO_BRIDGE && + ctx->family != NFPROTO_ARP) + return -EOPNOTSUPP; + + ret = nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING)); + if (ret) + return ret; + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); @@ -591,19 +630,14 @@ nfnl_compat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int rev, int target) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; event = nfnl_msg_type(NFNL_SUBSYS_NFT_COMPAT, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (nla_put_string(skb, NFTA_COMPAT_NAME, name) || nla_put_be32(skb, NFTA_COMPAT_REV, htonl(rev)) || nla_put_be32(skb, NFTA_COMPAT_TYPE, htonl(target))) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 28991730728b..0c7f091d7d54 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -27,8 +27,8 @@ struct nft_ct { enum nft_ct_keys key:8; enum ip_conntrack_dir dir:8; union { - enum nft_registers dreg:8; - enum nft_registers sreg:8; + u8 dreg; + u8 sreg; }; }; @@ -481,6 +481,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, break; #endif case NFT_CT_ID: + if (tb[NFTA_CT_DIRECTION]) + return -EINVAL; + len = sizeof(u32); break; default: @@ -498,9 +501,8 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, } } - priv->dreg = nft_parse_register(tb[NFTA_CT_DREG]); - err = nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + err = nft_parse_register_store(ctx, tb[NFTA_CT_DREG], &priv->dreg, NULL, + NFT_DATA_VALUE, len); if (err < 0) return err; @@ -600,8 +602,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, } } - priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]); - err = nft_validate_register_load(priv->sreg, len); + err = nft_parse_register_load(tb[NFTA_CT_SREG], &priv->sreg, len); if (err < 0) goto err1; @@ -1176,7 +1177,30 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx, if (tb[NFTA_CT_EXPECT_L3PROTO]) priv->l3num = ntohs(nla_get_be16(tb[NFTA_CT_EXPECT_L3PROTO])); + switch (priv->l3num) { + case NFPROTO_IPV4: + case NFPROTO_IPV6: + if (priv->l3num == ctx->family || ctx->family == NFPROTO_INET) + break; + + return -EINVAL; + case NFPROTO_INET: /* tuple.src.l3num supports NFPROTO_IPV4/6 only */ + default: + return -EAFNOSUPPORT; + } + priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]); + switch (priv->l4proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + case IPPROTO_DCCP: + case IPPROTO_SCTP: + break; + default: + return -EOPNOTSUPP; + } + priv->dport = nla_get_be16(tb[NFTA_CT_EXPECT_DPORT]); priv->timeout = nla_get_u32(tb[NFTA_CT_EXPECT_TIMEOUT]); priv->size = nla_get_u8(tb[NFTA_CT_EXPECT_SIZE]); diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c index 6007089e1c2f..a5b560ee0337 100644 --- a/net/netfilter/nft_dup_netdev.c +++ b/net/netfilter/nft_dup_netdev.c @@ -14,7 +14,7 @@ #include <net/netfilter/nf_dup_netdev.h> struct nft_dup_netdev { - enum nft_registers sreg_dev:8; + u8 sreg_dev; }; static void nft_dup_netdev_eval(const struct nft_expr *expr, @@ -40,8 +40,8 @@ static int nft_dup_netdev_init(const struct nft_ctx *ctx, if (tb[NFTA_DUP_SREG_DEV] == NULL) return -EINVAL; - priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]); - return nft_validate_register_load(priv->sreg_dev, sizeof(int)); + return nft_parse_register_load(tb[NFTA_DUP_SREG_DEV], &priv->sreg_dev, + sizeof(int)); } static int nft_dup_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 6fdea0e57db8..e0c17217817d 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -11,13 +11,16 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> +#include <net/netns/generic.h> + +extern unsigned int nf_tables_net_id; struct nft_dynset { struct nft_set *set; struct nft_set_ext_tmpl tmpl; enum nft_dynset_ops op:8; - enum nft_registers sreg_key:8; - enum nft_registers sreg_data:8; + u8 sreg_key; + u8 sreg_data; bool invert; u64 timeout; struct nft_expr *expr; @@ -129,13 +132,14 @@ static int nft_dynset_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); struct nft_dynset *priv = nft_expr_priv(expr); u8 genmask = nft_genmask_next(ctx->net); struct nft_set *set; u64 timeout; int err; - lockdep_assert_held(&ctx->net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); if (tb[NFTA_DYNSET_SET_NAME] == NULL || tb[NFTA_DYNSET_OP] == NULL || @@ -157,6 +161,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (IS_ERR(set)) return PTR_ERR(set); + if (set->flags & NFT_SET_OBJECT) + return -EOPNOTSUPP; + if (set->ops->update == NULL) return -EOPNOTSUPP; @@ -177,8 +184,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx, return err; } - priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]); - err = nft_validate_register_load(priv->sreg_key, set->klen); + err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_KEY], &priv->sreg_key, + set->klen); if (err < 0) return err; @@ -188,8 +195,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (set->dtype == NFT_DATA_VERDICT) return -EOPNOTSUPP; - priv->sreg_data = nft_parse_register(tb[NFTA_DYNSET_SREG_DATA]); - err = nft_validate_register_load(priv->sreg_data, set->dlen); + err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_DATA], + &priv->sreg_data, set->dlen); if (err < 0) return err; } else if (set->flags & NFT_SET_MAP) @@ -204,9 +211,6 @@ static int nft_dynset_init(const struct nft_ctx *ctx, return PTR_ERR(priv->expr); err = -EOPNOTSUPP; - if (!(priv->expr->ops->type->flags & NFT_EXPR_STATEFUL)) - goto err1; - if (priv->expr->ops->type->flags & NFT_EXPR_GC) { if (set->flags & NFT_SET_TIMEOUT) goto err1; @@ -262,7 +266,7 @@ static void nft_dynset_activate(const struct nft_ctx *ctx, { struct nft_dynset *priv = nft_expr_priv(expr); - priv->set->use++; + nf_tables_activate_set(ctx, priv->set); } static void nft_dynset_destroy(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index faa0844c01fb..670dd146fb2b 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -19,8 +19,8 @@ struct nft_exthdr { u8 offset; u8 len; u8 op; - enum nft_registers dreg:8; - enum nft_registers sreg:8; + u8 dreg; + u8 sreg; u8 flags; }; @@ -353,12 +353,12 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); priv->offset = offset; priv->len = len; - priv->dreg = nft_parse_register(tb[NFTA_EXTHDR_DREG]); priv->flags = flags; priv->op = op; - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, priv->len); + return nft_parse_register_store(ctx, tb[NFTA_EXTHDR_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + priv->len); } static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx, @@ -403,11 +403,11 @@ static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx, priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); priv->offset = offset; priv->len = len; - priv->sreg = nft_parse_register(tb[NFTA_EXTHDR_SREG]); priv->flags = flags; priv->op = op; - return nft_validate_register_load(priv->sreg, priv->len); + return nft_parse_register_load(tb[NFTA_EXTHDR_SREG], &priv->sreg, + priv->len); } static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index cfac0964f48d..d2777aff5943 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -86,7 +86,6 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; priv->result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT])); - priv->dreg = nft_parse_register(tb[NFTA_FIB_DREG]); switch (priv->result) { case NFT_FIB_RESULT_OIF: @@ -106,8 +105,8 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; } - err = nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + err = nft_parse_register_store(ctx, tb[NFTA_FIB_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); if (err < 0) return err; diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index ff5ac173e897..850d4e92702e 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -147,6 +147,11 @@ static int nft_flow_offload_validate(const struct nft_ctx *ctx, { unsigned int hook_mask = (1 << NF_INET_FORWARD); + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + return nft_chain_validate_hooks(ctx->chain, hook_mask); } @@ -171,8 +176,10 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx, if (IS_ERR(flowtable)) return PTR_ERR(flowtable); + if (!nft_use_inc(&flowtable->use)) + return -EMFILE; + priv->flowtable = flowtable; - flowtable->use++; return nf_ct_netns_get(ctx->net, ctx->family); } @@ -191,7 +198,7 @@ static void nft_flow_offload_activate(const struct nft_ctx *ctx, { struct nft_flow_offload *priv = nft_expr_priv(expr); - priv->flowtable->use++; + nft_use_inc_restore(&priv->flowtable->use); } static void nft_flow_offload_destroy(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 3b0dcd170551..7730409f6f09 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -18,7 +18,7 @@ #include <net/ip.h> struct nft_fwd_netdev { - enum nft_registers sreg_dev:8; + u8 sreg_dev; }; static void nft_fwd_netdev_eval(const struct nft_expr *expr, @@ -50,8 +50,8 @@ static int nft_fwd_netdev_init(const struct nft_ctx *ctx, if (tb[NFTA_FWD_SREG_DEV] == NULL) return -EINVAL; - priv->sreg_dev = nft_parse_register(tb[NFTA_FWD_SREG_DEV]); - return nft_validate_register_load(priv->sreg_dev, sizeof(int)); + return nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev, + sizeof(int)); } static int nft_fwd_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -83,8 +83,8 @@ static bool nft_fwd_netdev_offload_action(const struct nft_expr *expr) } struct nft_fwd_neigh { - enum nft_registers sreg_dev:8; - enum nft_registers sreg_addr:8; + u8 sreg_dev; + u8 sreg_addr; u8 nfproto; }; @@ -162,8 +162,6 @@ static int nft_fwd_neigh_init(const struct nft_ctx *ctx, !tb[NFTA_FWD_NFPROTO]) return -EINVAL; - priv->sreg_dev = nft_parse_register(tb[NFTA_FWD_SREG_DEV]); - priv->sreg_addr = nft_parse_register(tb[NFTA_FWD_SREG_ADDR]); priv->nfproto = ntohl(nla_get_be32(tb[NFTA_FWD_NFPROTO])); switch (priv->nfproto) { @@ -177,11 +175,13 @@ static int nft_fwd_neigh_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - err = nft_validate_register_load(priv->sreg_dev, sizeof(int)); + err = nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev, + sizeof(int)); if (err < 0) return err; - return nft_validate_register_load(priv->sreg_addr, addr_len); + return nft_parse_register_load(tb[NFTA_FWD_SREG_ADDR], &priv->sreg_addr, + addr_len); } static int nft_fwd_neigh_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index b836d550b919..2ff6c7759494 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -14,8 +14,8 @@ #include <linux/jhash.h> struct nft_jhash { - enum nft_registers sreg:8; - enum nft_registers dreg:8; + u8 sreg; + u8 dreg; u8 len; bool autogen_seed:1; u32 modulus; @@ -38,7 +38,7 @@ static void nft_jhash_eval(const struct nft_expr *expr, } struct nft_symhash { - enum nft_registers dreg:8; + u8 dreg; u32 modulus; u32 offset; }; @@ -83,9 +83,6 @@ static int nft_jhash_init(const struct nft_ctx *ctx, if (tb[NFTA_HASH_OFFSET]) priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET])); - priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]); - priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]); - err = nft_parse_u32_check(tb[NFTA_HASH_LEN], U8_MAX, &len); if (err < 0) return err; @@ -94,6 +91,10 @@ static int nft_jhash_init(const struct nft_ctx *ctx, priv->len = len; + err = nft_parse_register_load(tb[NFTA_HASH_SREG], &priv->sreg, len); + if (err < 0) + return err; + priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS])); if (priv->modulus < 1) return -ERANGE; @@ -108,9 +109,8 @@ static int nft_jhash_init(const struct nft_ctx *ctx, get_random_bytes(&priv->seed, sizeof(priv->seed)); } - return nft_validate_register_load(priv->sreg, len) && - nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, sizeof(u32)); + return nft_parse_register_store(ctx, tb[NFTA_HASH_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, sizeof(u32)); } static int nft_symhash_init(const struct nft_ctx *ctx, @@ -126,8 +126,6 @@ static int nft_symhash_init(const struct nft_ctx *ctx, if (tb[NFTA_HASH_OFFSET]) priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET])); - priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]); - priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS])); if (priv->modulus < 1) return -ERANGE; @@ -135,8 +133,9 @@ static int nft_symhash_init(const struct nft_ctx *ctx, if (priv->offset + priv->modulus - 1 < priv->offset) return -EOVERFLOW; - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, sizeof(u32)); + return nft_parse_register_store(ctx, tb[NFTA_HASH_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + sizeof(u32)); } static int nft_jhash_dump(struct sk_buff *skb, diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 98a8149be094..6a95d532eaec 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -48,9 +48,9 @@ static int nft_immediate_init(const struct nft_ctx *ctx, priv->dlen = desc.len; - priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]); - err = nft_validate_register_store(ctx, priv->dreg, &priv->data, - desc.type, desc.len); + err = nft_parse_register_store(ctx, tb[NFTA_IMMEDIATE_DREG], + &priv->dreg, &priv->data, desc.type, + desc.len); if (err < 0) goto err1; diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 660bad688e2b..e0ffd463a132 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -17,8 +17,8 @@ struct nft_lookup { struct nft_set *set; - enum nft_registers sreg:8; - enum nft_registers dreg:8; + u8 sreg; + u8 dreg; bool invert; struct nft_set_binding binding; }; @@ -73,8 +73,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx, if (IS_ERR(set)) return PTR_ERR(set); - priv->sreg = nft_parse_register(tb[NFTA_LOOKUP_SREG]); - err = nft_validate_register_load(priv->sreg, set->klen); + err = nft_parse_register_load(tb[NFTA_LOOKUP_SREG], &priv->sreg, + set->klen); if (err < 0) return err; @@ -97,9 +97,9 @@ static int nft_lookup_init(const struct nft_ctx *ctx, if (!(set->flags & NFT_SET_MAP)) return -EINVAL; - priv->dreg = nft_parse_register(tb[NFTA_LOOKUP_DREG]); - err = nft_validate_register_store(ctx, priv->dreg, NULL, - set->dtype, set->dlen); + err = nft_parse_register_store(ctx, tb[NFTA_LOOKUP_DREG], + &priv->dreg, NULL, set->dtype, + set->dlen); if (err < 0) return err; } else if (set->flags & NFT_SET_MAP) @@ -129,7 +129,7 @@ static void nft_lookup_activate(const struct nft_ctx *ctx, { struct nft_lookup *priv = nft_expr_priv(expr); - priv->set->use++; + nf_tables_activate_set(ctx, priv->set); } static void nft_lookup_destroy(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 39dc94f2491e..c2f04885347e 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -15,8 +15,8 @@ struct nft_masq { u32 flags; - enum nft_registers sreg_proto_min:8; - enum nft_registers sreg_proto_max:8; + u8 sreg_proto_min; + u8 sreg_proto_max; }; static const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = { @@ -54,19 +54,15 @@ static int nft_masq_init(const struct nft_ctx *ctx, } if (tb[NFTA_MASQ_REG_PROTO_MIN]) { - priv->sreg_proto_min = - nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MIN]); - - err = nft_validate_register_load(priv->sreg_proto_min, plen); + err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MIN], + &priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_MASQ_REG_PROTO_MAX]) { - priv->sreg_proto_max = - nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MAX]); - - err = nft_validate_register_load(priv->sreg_proto_max, - plen); + err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MAX], + &priv->sreg_proto_max, + plen); if (err < 0) return err; } else { diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index dda1e55d5801..ac7d3c78501b 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -247,7 +247,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, strncpy((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ); break; case NFT_META_TIME_NS: - nft_reg_store64(dest, ktime_get_real_ns()); + nft_reg_store64((u64 *)dest, ktime_get_real_ns()); break; case NFT_META_TIME_DAY: nft_reg_store8(dest, nft_meta_weekday(ktime_get_real_seconds())); @@ -380,9 +380,8 @@ int nft_meta_get_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - priv->dreg = nft_parse_register(tb[NFTA_META_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + return nft_parse_register_store(ctx, tb[NFTA_META_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); } EXPORT_SYMBOL_GPL(nft_meta_get_init); @@ -475,8 +474,7 @@ int nft_meta_set_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - priv->sreg = nft_parse_register(tb[NFTA_META_SREG]); - err = nft_validate_register_load(priv->sreg, len); + err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len); if (err < 0) return err; diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 17c0f75dfcdb..69fcea16218b 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -21,10 +21,10 @@ #include <net/ip.h> struct nft_nat { - enum nft_registers sreg_addr_min:8; - enum nft_registers sreg_addr_max:8; - enum nft_registers sreg_proto_min:8; - enum nft_registers sreg_proto_max:8; + u8 sreg_addr_min; + u8 sreg_addr_max; + u8 sreg_proto_min; + u8 sreg_proto_max; enum nf_nat_manip_type type:8; u8 family; u16 flags; @@ -88,6 +88,11 @@ static int nft_nat_validate(const struct nft_ctx *ctx, struct nft_nat *priv = nft_expr_priv(expr); int err; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); if (err < 0) return err; @@ -154,18 +159,15 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, priv->family = family; if (tb[NFTA_NAT_REG_ADDR_MIN]) { - priv->sreg_addr_min = - nft_parse_register(tb[NFTA_NAT_REG_ADDR_MIN]); - err = nft_validate_register_load(priv->sreg_addr_min, alen); + err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MIN], + &priv->sreg_addr_min, alen); if (err < 0) return err; if (tb[NFTA_NAT_REG_ADDR_MAX]) { - priv->sreg_addr_max = - nft_parse_register(tb[NFTA_NAT_REG_ADDR_MAX]); - - err = nft_validate_register_load(priv->sreg_addr_max, - alen); + err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MAX], + &priv->sreg_addr_max, + alen); if (err < 0) return err; } else { @@ -175,19 +177,15 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all); if (tb[NFTA_NAT_REG_PROTO_MIN]) { - priv->sreg_proto_min = - nft_parse_register(tb[NFTA_NAT_REG_PROTO_MIN]); - - err = nft_validate_register_load(priv->sreg_proto_min, plen); + err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MIN], + &priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_NAT_REG_PROTO_MAX]) { - priv->sreg_proto_max = - nft_parse_register(tb[NFTA_NAT_REG_PROTO_MAX]); - - err = nft_validate_register_load(priv->sreg_proto_max, - plen); + err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MAX], + &priv->sreg_proto_max, + plen); if (err < 0) return err; } else { @@ -283,7 +281,8 @@ static void nft_nat_inet_eval(const struct nft_expr *expr, { const struct nft_nat *priv = nft_expr_priv(expr); - if (priv->family == nft_pf(pkt)) + if (priv->family == nft_pf(pkt) || + priv->family == NFPROTO_INET) nft_nat_eval(expr, regs, pkt); } diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index 48edb9d5f012..7bbca252e7fc 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -16,7 +16,7 @@ static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state); struct nft_ng_inc { - enum nft_registers dreg:8; + u8 dreg; u32 modulus; atomic_t counter; u32 offset; @@ -66,11 +66,10 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, if (priv->offset + priv->modulus - 1 < priv->offset) return -EOVERFLOW; - priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); atomic_set(&priv->counter, priv->modulus - 1); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, sizeof(u32)); + return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, sizeof(u32)); } static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, @@ -100,7 +99,7 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr) } struct nft_ng_random { - enum nft_registers dreg:8; + u8 dreg; u32 modulus; u32 offset; }; @@ -140,10 +139,8 @@ static int nft_ng_random_init(const struct nft_ctx *ctx, prandom_init_once(&nft_numgen_prandom_state); - priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); - - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, sizeof(u32)); + return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, sizeof(u32)); } static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index bfd18d2b65a2..f5028479c028 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -41,8 +41,10 @@ static int nft_objref_init(const struct nft_ctx *ctx, if (IS_ERR(obj)) return -ENOENT; + if (!nft_use_inc(&obj->use)) + return -EMFILE; + nft_objref_priv(expr) = obj; - obj->use++; return 0; } @@ -71,7 +73,7 @@ static void nft_objref_deactivate(const struct nft_ctx *ctx, if (phase == NFT_TRANS_COMMIT) return; - obj->use--; + nft_use_dec(&obj->use); } static void nft_objref_activate(const struct nft_ctx *ctx, @@ -79,7 +81,7 @@ static void nft_objref_activate(const struct nft_ctx *ctx, { struct nft_object *obj = nft_objref_priv(expr); - obj->use++; + nft_use_inc_restore(&obj->use); } static struct nft_expr_type nft_objref_type; @@ -95,7 +97,7 @@ static const struct nft_expr_ops nft_objref_ops = { struct nft_objref_map { struct nft_set *set; - enum nft_registers sreg:8; + u8 sreg; struct nft_set_binding binding; }; @@ -137,8 +139,8 @@ static int nft_objref_map_init(const struct nft_ctx *ctx, if (!(set->flags & NFT_SET_OBJECT)) return -EINVAL; - priv->sreg = nft_parse_register(tb[NFTA_OBJREF_SET_SREG]); - err = nft_validate_register_load(priv->sreg, set->klen); + err = nft_parse_register_load(tb[NFTA_OBJREF_SET_SREG], &priv->sreg, + set->klen); if (err < 0) return err; @@ -180,7 +182,7 @@ static void nft_objref_map_activate(const struct nft_ctx *ctx, { struct nft_objref_map *priv = nft_expr_priv(expr); - priv->set->use++; + nf_tables_activate_set(ctx, priv->set); } static void nft_objref_map_destroy(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index 4911f8eb394f..b7c2bc01f8a2 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -6,7 +6,7 @@ #include <linux/netfilter/nfnetlink_osf.h> struct nft_osf { - enum nft_registers dreg:8; + u8 dreg; u8 ttl; u32 flags; }; @@ -83,9 +83,9 @@ static int nft_osf_init(const struct nft_ctx *ctx, priv->flags = flags; } - priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]); - err = nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, NFT_OSF_MAXGENRELEN); + err = nft_parse_register_store(ctx, tb[NFTA_OSF_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, + NFT_OSF_MAXGENRELEN); if (err < 0) return err; @@ -115,9 +115,21 @@ static int nft_osf_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) { - return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) | - (1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_FORWARD)); + unsigned int hooks; + + switch (ctx->family) { + case NFPROTO_IPV4: + case NFPROTO_IPV6: + case NFPROTO_INET: + hooks = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_FORWARD); + break; + default: + return -EOPNOTSUPP; + } + + return nft_chain_validate_hooks(ctx->chain, hooks); } static struct nft_expr_type nft_osf_type; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index cf0512fc648e..a4f9a150812a 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -84,7 +84,7 @@ void nft_payload_eval(const struct nft_expr *expr, switch (priv->base) { case NFT_PAYLOAD_LL_HEADER: - if (!skb_mac_header_was_set(skb)) + if (!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) == 0) goto err; if (skb_vlan_tag_present(skb)) { @@ -135,10 +135,10 @@ static int nft_payload_init(const struct nft_ctx *ctx, priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); - priv->dreg = nft_parse_register(tb[NFTA_PAYLOAD_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, priv->len); + return nft_parse_register_store(ctx, tb[NFTA_PAYLOAD_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + priv->len); } static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -558,18 +558,23 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_payload_set *priv = nft_expr_priv(expr); + u32 csum_offset, csum_type = NFT_PAYLOAD_CSUM_NONE; + int err; priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); - priv->sreg = nft_parse_register(tb[NFTA_PAYLOAD_SREG]); if (tb[NFTA_PAYLOAD_CSUM_TYPE]) - priv->csum_type = - ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE])); - if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) - priv->csum_offset = - ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET])); + csum_type = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE])); + if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) { + err = nft_parse_u32_check(tb[NFTA_PAYLOAD_CSUM_OFFSET], U8_MAX, + &csum_offset); + if (err < 0) + return err; + + priv->csum_offset = csum_offset; + } if (tb[NFTA_PAYLOAD_CSUM_FLAGS]) { u32 flags; @@ -580,15 +585,17 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, priv->csum_flags = flags; } - switch (priv->csum_type) { + switch (csum_type) { case NFT_PAYLOAD_CSUM_NONE: case NFT_PAYLOAD_CSUM_INET: break; default: return -EOPNOTSUPP; } + priv->csum_type = csum_type; - return nft_validate_register_load(priv->sreg, priv->len); + return nft_parse_register_load(tb[NFTA_PAYLOAD_SREG], &priv->sreg, + priv->len); } static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -624,6 +631,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx, { enum nft_payload_bases base; unsigned int offset, len; + int err; if (tb[NFTA_PAYLOAD_BASE] == NULL || tb[NFTA_PAYLOAD_OFFSET] == NULL || @@ -649,8 +657,13 @@ nft_payload_select_ops(const struct nft_ctx *ctx, if (tb[NFTA_PAYLOAD_DREG] == NULL) return ERR_PTR(-EINVAL); - offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); - len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); + err = nft_parse_u32_check(tb[NFTA_PAYLOAD_OFFSET], U8_MAX, &offset); + if (err < 0) + return ERR_PTR(err); + + err = nft_parse_u32_check(tb[NFTA_PAYLOAD_LEN], U8_MAX, &len); + if (err < 0) + return ERR_PTR(err); if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) && base != NFT_PAYLOAD_LL_HEADER) diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index 5ece0a6aa8c3..94a4f0a5a28e 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -19,10 +19,10 @@ static u32 jhash_initval __read_mostly; struct nft_queue { - enum nft_registers sreg_qnum:8; - u16 queuenum; - u16 queues_total; - u16 flags; + u8 sreg_qnum; + u16 queuenum; + u16 queues_total; + u16 flags; }; static void nft_queue_eval(const struct nft_expr *expr, @@ -111,8 +111,8 @@ static int nft_queue_sreg_init(const struct nft_ctx *ctx, struct nft_queue *priv = nft_expr_priv(expr); int err; - priv->sreg_qnum = nft_parse_register(tb[NFTA_QUEUE_SREG_QNUM]); - err = nft_validate_register_load(priv->sreg_qnum, sizeof(u32)); + err = nft_parse_register_load(tb[NFTA_QUEUE_SREG_QNUM], + &priv->sreg_qnum, sizeof(u32)); if (err < 0) return err; diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index 89efcc5a533d..e4a1c44d7f51 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -15,7 +15,7 @@ struct nft_range_expr { struct nft_data data_from; struct nft_data data_to; - enum nft_registers sreg:8; + u8 sreg; u8 len; enum nft_range_ops op:8; }; @@ -86,8 +86,8 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr goto err2; } - priv->sreg = nft_parse_register(tb[NFTA_RANGE_SREG]); - err = nft_validate_register_load(priv->sreg, desc_from.len); + err = nft_parse_register_load(tb[NFTA_RANGE_SREG], &priv->sreg, + desc_from.len); if (err < 0) goto err2; diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index 43eeb1f609f1..7179dada5757 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -14,8 +14,8 @@ #include <net/netfilter/nf_tables.h> struct nft_redir { - enum nft_registers sreg_proto_min:8; - enum nft_registers sreg_proto_max:8; + u8 sreg_proto_min; + u8 sreg_proto_max; u16 flags; }; @@ -50,24 +50,22 @@ static int nft_redir_init(const struct nft_ctx *ctx, plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all); if (tb[NFTA_REDIR_REG_PROTO_MIN]) { - priv->sreg_proto_min = - nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MIN]); - - err = nft_validate_register_load(priv->sreg_proto_min, plen); + err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MIN], + &priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_REDIR_REG_PROTO_MAX]) { - priv->sreg_proto_max = - nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MAX]); - - err = nft_validate_register_load(priv->sreg_proto_max, - plen); + err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MAX], + &priv->sreg_proto_max, + plen); if (err < 0) return err; } else { priv->sreg_proto_max = priv->sreg_proto_min; } + + priv->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } if (tb[NFTA_REDIR_FLAGS]) { @@ -102,25 +100,37 @@ nla_put_failure: return -1; } -static void nft_redir_ipv4_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static void nft_redir_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { - struct nft_redir *priv = nft_expr_priv(expr); - struct nf_nat_ipv4_multi_range_compat mr; + const struct nft_redir *priv = nft_expr_priv(expr); + struct nf_nat_range2 range; - memset(&mr, 0, sizeof(mr)); + memset(&range, 0, sizeof(range)); + range.flags = priv->flags; if (priv->sreg_proto_min) { - mr.range[0].min.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_min]); - mr.range[0].max.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_max]); - mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + range.min_proto.all = (__force __be16) + nft_reg_load16(®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16) + nft_reg_load16(®s->data[priv->sreg_proto_max]); } - mr.range[0].flags |= priv->flags; - - regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, nft_hook(pkt)); + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &range, + nft_hook(pkt)); + break; +#ifdef CONFIG_NF_TABLES_IPV6 + case NFPROTO_IPV6: + regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, + nft_hook(pkt)); + break; +#endif + default: + WARN_ON_ONCE(1); + break; + } } static void @@ -133,7 +143,7 @@ static struct nft_expr_type nft_redir_ipv4_type; static const struct nft_expr_ops nft_redir_ipv4_ops = { .type = &nft_redir_ipv4_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), - .eval = nft_redir_ipv4_eval, + .eval = nft_redir_eval, .init = nft_redir_init, .destroy = nft_redir_ipv4_destroy, .dump = nft_redir_dump, @@ -150,28 +160,6 @@ static struct nft_expr_type nft_redir_ipv4_type __read_mostly = { }; #ifdef CONFIG_NF_TABLES_IPV6 -static void nft_redir_ipv6_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_redir *priv = nft_expr_priv(expr); - struct nf_nat_range2 range; - - memset(&range, 0, sizeof(range)); - if (priv->sreg_proto_min) { - range.min_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_min]); - range.max_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_max]); - range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - } - - range.flags |= priv->flags; - - regs->verdict.code = - nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt)); -} - static void nft_redir_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -182,7 +170,7 @@ static struct nft_expr_type nft_redir_ipv6_type; static const struct nft_expr_ops nft_redir_ipv6_ops = { .type = &nft_redir_ipv6_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), - .eval = nft_redir_ipv6_eval, + .eval = nft_redir_eval, .init = nft_redir_init, .destroy = nft_redir_ipv6_destroy, .dump = nft_redir_dump, @@ -200,20 +188,6 @@ static struct nft_expr_type nft_redir_ipv6_type __read_mostly = { #endif #ifdef CONFIG_NF_TABLES_INET -static void nft_redir_inet_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - switch (nft_pf(pkt)) { - case NFPROTO_IPV4: - return nft_redir_ipv4_eval(expr, regs, pkt); - case NFPROTO_IPV6: - return nft_redir_ipv6_eval(expr, regs, pkt); - } - - WARN_ON_ONCE(1); -} - static void nft_redir_inet_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -224,7 +198,7 @@ static struct nft_expr_type nft_redir_inet_type; static const struct nft_expr_ops nft_redir_inet_ops = { .type = &nft_redir_inet_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), - .eval = nft_redir_inet_eval, + .eval = nft_redir_eval, .init = nft_redir_init, .destroy = nft_redir_inet_destroy, .dump = nft_redir_dump, @@ -236,7 +210,7 @@ static struct nft_expr_type nft_redir_inet_type __read_mostly = { .name = "redir", .ops = &nft_redir_inet_ops, .policy = nft_redir_policy, - .maxattr = NFTA_MASQ_MAX, + .maxattr = NFTA_REDIR_MAX, .owner = THIS_MODULE, }; diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index 7cfcb0e2f7ee..f4a96164a5a1 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -15,7 +15,7 @@ struct nft_rt { enum nft_rt_keys key:8; - enum nft_registers dreg:8; + u8 dreg; }; static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skbdst) @@ -141,9 +141,8 @@ static int nft_rt_get_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - priv->dreg = nft_parse_register(tb[NFTA_RT_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + return nft_parse_register_store(ctx, tb[NFTA_RT_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); } static int nft_rt_get_dump(struct sk_buff *skb, @@ -167,6 +166,11 @@ static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *exp const struct nft_rt *priv = nft_expr_priv(expr); unsigned int hooks; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + switch (priv->key) { case NFT_RT_NEXTHOP4: case NFT_RT_NEXTHOP6: diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 087a056e34d1..b0f6b1490e1a 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -270,13 +270,14 @@ static int nft_bitmap_init(const struct nft_set *set, return 0; } -static void nft_bitmap_destroy(const struct nft_set *set) +static void nft_bitmap_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_bitmap *priv = nft_set_priv(set); struct nft_bitmap_elem *be, *n; list_for_each_entry_safe(be, n, &priv->list, head) - nft_set_elem_destroy(set, be, true); + nf_tables_set_elem_destroy(ctx, set, be); } static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 9de0eb20e954..0581d5499c5a 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -17,6 +17,9 @@ #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> +#include <net/netns/generic.h> + +extern unsigned int nf_tables_net_id; /* We target a hash table size of 4, element hint is 75% of final size */ #define NFT_RHASH_ELEMENT_HINT 3 @@ -59,6 +62,8 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg, if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen)) return 1; + if (nft_set_elem_is_dead(&he->ext)) + return 1; if (nft_set_elem_expired(&he->ext)) return 1; if (!nft_set_elem_active(&he->ext, x->genmask)) @@ -142,6 +147,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, /* Another cpu may race to insert the element with the same key */ if (prev) { nft_set_elem_destroy(set, he, true); + atomic_dec(&set->nelems); he = prev; } @@ -151,6 +157,7 @@ out: err2: nft_set_elem_destroy(set, he, true); + atomic_dec(&set->nelems); err1: return false; } @@ -185,7 +192,6 @@ static void nft_rhash_activate(const struct net *net, const struct nft_set *set, struct nft_rhash_elem *he = elem->priv; nft_set_elem_change_active(net, set, &he->ext); - nft_set_elem_clear_busy(&he->ext); } static bool nft_rhash_flush(const struct net *net, @@ -193,12 +199,9 @@ static bool nft_rhash_flush(const struct net *net, { struct nft_rhash_elem *he = priv; - if (!nft_set_elem_mark_busy(&he->ext) || - !nft_is_active(net, &he->ext)) { - nft_set_elem_change_active(net, set, &he->ext); - return true; - } - return false; + nft_set_elem_change_active(net, set, &he->ext); + + return true; } static void *nft_rhash_deactivate(const struct net *net, @@ -215,9 +218,8 @@ static void *nft_rhash_deactivate(const struct net *net, rcu_read_lock(); he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); - if (he != NULL && - !nft_rhash_flush(net, set, he)) - he = NULL; + if (he) + nft_set_elem_change_active(net, set, &he->ext); rcu_read_unlock(); @@ -249,7 +251,9 @@ static bool nft_rhash_delete(const struct nft_set *set, if (he == NULL) return false; - return rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params) == 0; + nft_set_elem_dead(&he->ext); + + return true; } static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, @@ -275,8 +279,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, if (iter->count < iter->skip) goto cont; - if (nft_set_elem_expired(&he->ext)) - goto cont; if (!nft_set_elem_active(&he->ext, iter->genmask)) goto cont; @@ -295,49 +297,77 @@ cont: static void nft_rhash_gc(struct work_struct *work) { + struct nftables_pernet *nft_net; struct nft_set *set; struct nft_rhash_elem *he; struct nft_rhash *priv; - struct nft_set_gc_batch *gcb = NULL; struct rhashtable_iter hti; + struct nft_trans_gc *gc; + struct net *net; + u32 gc_seq; priv = container_of(work, struct nft_rhash, gc_work.work); set = nft_set_container_of(priv); + net = read_pnet(&set->net); + nft_net = net_generic(net, nf_tables_net_id); + gc_seq = READ_ONCE(nft_net->gc_seq); + + if (nft_set_gc_is_pending(set)) + goto done; + + gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); + if (!gc) + goto done; rhashtable_walk_enter(&priv->ht, &hti); rhashtable_walk_start(&hti); while ((he = rhashtable_walk_next(&hti))) { if (IS_ERR(he)) { - if (PTR_ERR(he) != -EAGAIN) - break; - continue; + nft_trans_gc_destroy(gc); + gc = NULL; + goto try_later; + } + + /* Ruleset has been updated, try later. */ + if (READ_ONCE(nft_net->gc_seq) != gc_seq) { + nft_trans_gc_destroy(gc); + gc = NULL; + goto try_later; } + if (nft_set_elem_is_dead(&he->ext)) + goto dead_elem; + if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPR)) { struct nft_expr *expr = nft_set_ext_expr(&he->ext); if (expr->ops->gc && expr->ops->gc(read_pnet(&set->net), expr)) - goto gc; + goto needs_gc_run; } if (!nft_set_elem_expired(&he->ext)) continue; -gc: - if (nft_set_elem_mark_busy(&he->ext)) - continue; - gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); - if (gcb == NULL) - break; - rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params); - atomic_dec(&set->nelems); - nft_set_gc_batch_add(gcb, he); +needs_gc_run: + nft_set_elem_dead(&he->ext); +dead_elem: + gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + if (!gc) + goto try_later; + + nft_trans_gc_elem_add(gc, he); } + +try_later: + /* catchall list iteration requires rcu read side lock. */ rhashtable_walk_stop(&hti); rhashtable_walk_exit(&hti); - nft_set_gc_batch_complete(gcb); + if (gc) + nft_trans_gc_queue_async_done(gc); + +done: queue_delayed_work(system_power_efficient_wq, &priv->gc_work, nft_set_gc_interval(set)); } @@ -372,25 +402,36 @@ static int nft_rhash_init(const struct nft_set *set, return err; INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc); - if (set->flags & NFT_SET_TIMEOUT) + if (set->flags & (NFT_SET_TIMEOUT | NFT_SET_EVAL)) nft_rhash_gc_init(set); return 0; } +struct nft_rhash_ctx { + const struct nft_ctx ctx; + const struct nft_set *set; +}; + static void nft_rhash_elem_destroy(void *ptr, void *arg) { - nft_set_elem_destroy(arg, ptr, true); + struct nft_rhash_ctx *rhash_ctx = arg; + + nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, ptr); } -static void nft_rhash_destroy(const struct nft_set *set) +static void nft_rhash_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_rhash *priv = nft_set_priv(set); + struct nft_rhash_ctx rhash_ctx = { + .ctx = *ctx, + .set = set, + }; cancel_delayed_work_sync(&priv->gc_work); - rcu_barrier(); rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy, - (void *)set); + (void *)&rhash_ctx); } /* Number of buckets is stored in u32, so cap our result to 1U<<31 */ @@ -619,7 +660,8 @@ static int nft_hash_init(const struct nft_set *set, return 0; } -static void nft_hash_destroy(const struct nft_set *set) +static void nft_hash_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he; @@ -629,7 +671,7 @@ static void nft_hash_destroy(const struct nft_set *set) for (i = 0; i < priv->buckets; i++) { hlist_for_each_entry_safe(he, next, &priv->table[i], node) { hlist_del_rcu(&he->node); - nft_set_elem_destroy(set, he, true); + nf_tables_set_elem_destroy(ctx, set, he); } } } diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index ee7c29e0a9d7..2b5f65a424b7 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -14,6 +14,9 @@ #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> +#include <net/netns/generic.h> + +extern unsigned int nf_tables_net_id; struct nft_rbtree { struct rb_root root; @@ -38,10 +41,18 @@ static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe) return !nft_rbtree_interval_end(rbe); } -static bool nft_rbtree_equal(const struct nft_set *set, const void *this, - const struct nft_rbtree_elem *interval) +static int nft_rbtree_cmp(const struct nft_set *set, + const struct nft_rbtree_elem *e1, + const struct nft_rbtree_elem *e2) +{ + return memcmp(nft_set_ext_key(&e1->ext), nft_set_ext_key(&e2->ext), + set->klen); +} + +static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe) { - return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0; + return nft_set_elem_expired(&rbe->ext) || + nft_set_elem_is_dead(&rbe->ext); } static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, @@ -52,7 +63,6 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set const struct nft_rbtree_elem *rbe, *interval = NULL; u8 genmask = nft_genmask_cur(net); const struct rb_node *parent; - const void *this; int d; parent = rcu_dereference_raw(priv->root.rb_node); @@ -62,12 +72,11 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set rbe = rb_entry(parent, struct nft_rbtree_elem, node); - this = nft_set_ext_key(&rbe->ext); - d = memcmp(this, key, set->klen); + d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen); if (d < 0) { parent = rcu_dereference_raw(parent->rb_left); if (interval && - nft_rbtree_equal(set, this, interval) && + !nft_rbtree_cmp(set, rbe, interval) && nft_rbtree_interval_end(rbe) && nft_rbtree_interval_start(interval)) continue; @@ -80,7 +89,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set continue; } - if (nft_set_elem_expired(&rbe->ext)) + if (nft_rbtree_elem_expired(rbe)) return false; if (nft_rbtree_interval_end(rbe)) { @@ -98,7 +107,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set if (set->flags & NFT_SET_INTERVAL && interval != NULL && nft_set_elem_active(&interval->ext, genmask) && - !nft_set_elem_expired(&interval->ext) && + !nft_rbtree_elem_expired(interval) && nft_rbtree_interval_start(interval)) { *ext = &interval->ext; return true; @@ -214,43 +223,257 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set, return rbe; } +static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set, + struct nft_rbtree *priv, + struct nft_rbtree_elem *rbe) +{ + struct nft_set_elem elem = { + .priv = rbe, + }; + + nft_setelem_data_deactivate(net, set, &elem); + rb_erase(&rbe->node, &priv->root); +} + +static int nft_rbtree_gc_elem(const struct nft_set *__set, + struct nft_rbtree *priv, + struct nft_rbtree_elem *rbe) +{ + struct nft_set *set = (struct nft_set *)__set; + struct rb_node *prev = rb_prev(&rbe->node); + struct net *net = read_pnet(&set->net); + struct nft_rbtree_elem *rbe_prev; + struct nft_trans_gc *gc; + + gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC); + if (!gc) + return -ENOMEM; + + /* search for end interval coming before this element. + * end intervals don't carry a timeout extension, they + * are coupled with the interval start element. + */ + while (prev) { + rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); + if (nft_rbtree_interval_end(rbe_prev) && + nft_set_elem_active(&rbe_prev->ext, NFT_GENMASK_ANY)) + break; + + prev = rb_prev(prev); + } + + if (prev) { + rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); + nft_rbtree_gc_remove(net, set, priv, rbe_prev); + + /* There is always room in this trans gc for this element, + * memory allocation never actually happens, hence, the warning + * splat in such case. No need to set NFT_SET_ELEM_DEAD_BIT, + * this is synchronous gc which never fails. + */ + gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); + if (WARN_ON_ONCE(!gc)) + return -ENOMEM; + + nft_trans_gc_elem_add(gc, rbe_prev); + } + + nft_rbtree_gc_remove(net, set, priv, rbe); + gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); + if (WARN_ON_ONCE(!gc)) + return -ENOMEM; + + nft_trans_gc_elem_add(gc, rbe); + + nft_trans_gc_queue_sync_done(gc); + + return 0; +} + +static bool nft_rbtree_update_first(const struct nft_set *set, + struct nft_rbtree_elem *rbe, + struct rb_node *first) +{ + struct nft_rbtree_elem *first_elem; + + first_elem = rb_entry(first, struct nft_rbtree_elem, node); + /* this element is closest to where the new element is to be inserted: + * update the first element for the node list path. + */ + if (nft_rbtree_cmp(set, rbe, first_elem) < 0) + return true; + + return false; +} + static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_rbtree_elem *new, struct nft_set_ext **ext) { + struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL; + struct rb_node *node, *next, *parent, **p, *first = NULL; struct nft_rbtree *priv = nft_set_priv(set); + u8 cur_genmask = nft_genmask_cur(net); u8 genmask = nft_genmask_next(net); - struct nft_rbtree_elem *rbe; - struct rb_node *parent, **p; - int d; + int d, err; + /* Descend the tree to search for an existing element greater than the + * key value to insert that is greater than the new element. This is the + * first element to walk the ordered elements to find possible overlap. + */ parent = NULL; p = &priv->root.rb_node; while (*p != NULL) { parent = *p; rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = memcmp(nft_set_ext_key(&rbe->ext), - nft_set_ext_key(&new->ext), - set->klen); - if (d < 0) + d = nft_rbtree_cmp(set, rbe, new); + + if (d < 0) { p = &parent->rb_left; - else if (d > 0) + } else if (d > 0) { + if (!first || + nft_rbtree_update_first(set, rbe, first)) + first = &rbe->node; + p = &parent->rb_right; - else { - if (nft_rbtree_interval_end(rbe) && - nft_rbtree_interval_start(new)) { + } else { + if (nft_rbtree_interval_end(rbe)) p = &parent->rb_left; - } else if (nft_rbtree_interval_start(rbe) && - nft_rbtree_interval_end(new)) { + else p = &parent->rb_right; - } else if (nft_set_elem_active(&rbe->ext, genmask)) { - *ext = &rbe->ext; - return -EEXIST; - } else { - p = &parent->rb_left; + } + } + + if (!first) + first = rb_first(&priv->root); + + /* Detect overlap by going through the list of valid tree nodes. + * Values stored in the tree are in reversed order, starting from + * highest to lowest value. + */ + for (node = first; node != NULL; node = next) { + next = rb_next(node); + + rbe = rb_entry(node, struct nft_rbtree_elem, node); + + if (!nft_set_elem_active(&rbe->ext, genmask)) + continue; + + /* perform garbage collection to avoid bogus overlap reports + * but skip new elements in this transaction. + */ + if (nft_set_elem_expired(&rbe->ext) && + nft_set_elem_active(&rbe->ext, cur_genmask)) { + err = nft_rbtree_gc_elem(set, priv, rbe); + if (err < 0) + return err; + + continue; + } + + d = nft_rbtree_cmp(set, rbe, new); + if (d == 0) { + /* Matching end element: no need to look for an + * overlapping greater or equal element. + */ + if (nft_rbtree_interval_end(rbe)) { + rbe_le = rbe; + break; + } + + /* first element that is greater or equal to key value. */ + if (!rbe_ge) { + rbe_ge = rbe; + continue; + } + + /* this is a closer more or equal element, update it. */ + if (nft_rbtree_cmp(set, rbe_ge, new) != 0) { + rbe_ge = rbe; + continue; + } + + /* element is equal to key value, make sure flags are + * the same, an existing more or equal start element + * must not be replaced by more or equal end element. + */ + if ((nft_rbtree_interval_start(new) && + nft_rbtree_interval_start(rbe_ge)) || + (nft_rbtree_interval_end(new) && + nft_rbtree_interval_end(rbe_ge))) { + rbe_ge = rbe; + continue; } + } else if (d > 0) { + /* annotate element greater than the new element. */ + rbe_ge = rbe; + continue; + } else if (d < 0) { + /* annotate element less than the new element. */ + rbe_le = rbe; + break; } } + + /* - new start element matching existing start element: full overlap + * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. + */ + if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) && + nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) { + *ext = &rbe_ge->ext; + return -EEXIST; + } + + /* - new end element matching existing end element: full overlap + * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. + */ + if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) && + nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) { + *ext = &rbe_le->ext; + return -EEXIST; + } + + /* - new start element with existing closest, less or equal key value + * being a start element: partial overlap, reported as -ENOTEMPTY. + * Anonymous sets allow for two consecutive start element since they + * are constant, skip them to avoid bogus overlap reports. + */ + if (!nft_set_is_anonymous(set) && rbe_le && + nft_rbtree_interval_start(rbe_le) && nft_rbtree_interval_start(new)) + return -ENOTEMPTY; + + /* - new end element with existing closest, less or equal key value + * being a end element: partial overlap, reported as -ENOTEMPTY. + */ + if (rbe_le && + nft_rbtree_interval_end(rbe_le) && nft_rbtree_interval_end(new)) + return -ENOTEMPTY; + + /* - new end element with existing closest, greater or equal key value + * being an end element: partial overlap, reported as -ENOTEMPTY + */ + if (rbe_ge && + nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new)) + return -ENOTEMPTY; + + /* Accepted element: pick insertion point depending on key value */ + parent = NULL; + p = &priv->root.rb_node; + while (*p != NULL) { + parent = *p; + rbe = rb_entry(parent, struct nft_rbtree_elem, node); + d = nft_rbtree_cmp(set, rbe, new); + + if (d < 0) + p = &parent->rb_left; + else if (d > 0) + p = &parent->rb_right; + else if (nft_rbtree_interval_end(rbe)) + p = &parent->rb_left; + else + p = &parent->rb_right; + } + rb_link_node_rcu(&new->node, parent, p); rb_insert_color(&new->node, &priv->root); return 0; @@ -294,7 +517,6 @@ static void nft_rbtree_activate(const struct net *net, struct nft_rbtree_elem *rbe = elem->priv; nft_set_elem_change_active(net, set, &rbe->ext); - nft_set_elem_clear_busy(&rbe->ext); } static bool nft_rbtree_flush(const struct net *net, @@ -302,12 +524,9 @@ static bool nft_rbtree_flush(const struct net *net, { struct nft_rbtree_elem *rbe = priv; - if (!nft_set_elem_mark_busy(&rbe->ext) || - !nft_is_active(net, &rbe->ext)) { - nft_set_elem_change_active(net, set, &rbe->ext); - return true; - } - return false; + nft_set_elem_change_active(net, set, &rbe->ext); + + return true; } static void *nft_rbtree_deactivate(const struct net *net, @@ -338,6 +557,8 @@ static void *nft_rbtree_deactivate(const struct net *net, nft_rbtree_interval_end(this)) { parent = parent->rb_right; continue; + } else if (nft_set_elem_expired(&rbe->ext)) { + break; } else if (!nft_set_elem_active(&rbe->ext, genmask)) { parent = parent->rb_left; continue; @@ -364,8 +585,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, if (iter->count < iter->skip) goto cont; - if (nft_set_elem_expired(&rbe->ext)) - goto cont; if (!nft_set_elem_active(&rbe->ext, iter->genmask)) goto cont; @@ -384,58 +603,82 @@ cont: static void nft_rbtree_gc(struct work_struct *work) { - struct nft_rbtree_elem *rbe, *rbe_end = NULL, *rbe_prev = NULL; - struct nft_set_gc_batch *gcb = NULL; + struct nft_rbtree_elem *rbe, *rbe_end = NULL; + struct nftables_pernet *nft_net; struct nft_rbtree *priv; + struct nft_trans_gc *gc; struct rb_node *node; struct nft_set *set; + unsigned int gc_seq; + struct net *net; priv = container_of(work, struct nft_rbtree, gc_work.work); set = nft_set_container_of(priv); + net = read_pnet(&set->net); + nft_net = net_generic(net, nf_tables_net_id); + gc_seq = READ_ONCE(nft_net->gc_seq); - write_lock_bh(&priv->lock); - write_seqcount_begin(&priv->count); + if (nft_set_gc_is_pending(set)) + goto done; + + gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); + if (!gc) + goto done; + + read_lock_bh(&priv->lock); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { + + /* Ruleset has been updated, try later. */ + if (READ_ONCE(nft_net->gc_seq) != gc_seq) { + nft_trans_gc_destroy(gc); + gc = NULL; + goto try_later; + } + rbe = rb_entry(node, struct nft_rbtree_elem, node); + if (nft_set_elem_is_dead(&rbe->ext)) + goto dead_elem; + + /* elements are reversed in the rbtree for historical reasons, + * from highest to lowest value, that is why end element is + * always visited before the start element. + */ if (nft_rbtree_interval_end(rbe)) { rbe_end = rbe; continue; } + if (!nft_set_elem_expired(&rbe->ext)) continue; - if (nft_set_elem_mark_busy(&rbe->ext)) + + nft_set_elem_dead(&rbe->ext); + + if (!rbe_end) continue; - if (rbe_prev) { - rb_erase(&rbe_prev->node, &priv->root); - rbe_prev = NULL; - } - gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); - if (!gcb) - break; + nft_set_elem_dead(&rbe_end->ext); - atomic_dec(&set->nelems); - nft_set_gc_batch_add(gcb, rbe); - rbe_prev = rbe; + gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + if (!gc) + goto try_later; - if (rbe_end) { - atomic_dec(&set->nelems); - nft_set_gc_batch_add(gcb, rbe_end); - rb_erase(&rbe_end->node, &priv->root); - rbe_end = NULL; - } - node = rb_next(node); - if (!node) - break; + nft_trans_gc_elem_add(gc, rbe_end); + rbe_end = NULL; +dead_elem: + gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + if (!gc) + goto try_later; + + nft_trans_gc_elem_add(gc, rbe); } - if (rbe_prev) - rb_erase(&rbe_prev->node, &priv->root); - write_seqcount_end(&priv->count); - write_unlock_bh(&priv->lock); - nft_set_gc_batch_complete(gcb); +try_later: + read_unlock_bh(&priv->lock); + if (gc) + nft_trans_gc_queue_async_done(gc); +done: queue_delayed_work(system_power_efficient_wq, &priv->gc_work, nft_set_gc_interval(set)); } @@ -464,7 +707,8 @@ static int nft_rbtree_init(const struct nft_set *set, return 0; } -static void nft_rbtree_destroy(const struct nft_set *set) +static void nft_rbtree_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe; @@ -475,7 +719,7 @@ static void nft_rbtree_destroy(const struct nft_set *set) while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); - nft_set_elem_destroy(set, rbe, true); + nf_tables_set_elem_destroy(ctx, set, rbe); } } diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 4026ec38526f..c7b78e4ef459 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -10,7 +10,7 @@ struct nft_socket { enum nft_socket_keys key:8; union { - enum nft_registers dreg:8; + u8 dreg; }; }; @@ -119,9 +119,8 @@ static int nft_socket_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - priv->dreg = nft_parse_register(tb[NFTA_SOCKET_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + return nft_parse_register_store(ctx, tb[NFTA_SOCKET_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); } static int nft_socket_dump(struct sk_buff *skb, @@ -140,6 +139,11 @@ static int nft_socket_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) { + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c index 15abb0e49603..2e026f903a4c 100644 --- a/net/netfilter/nft_synproxy.c +++ b/net/netfilter/nft_synproxy.c @@ -186,7 +186,6 @@ static int nft_synproxy_do_init(const struct nft_ctx *ctx, break; #endif case NFPROTO_INET: - case NFPROTO_BRIDGE: err = nf_synproxy_ipv4_init(snet, ctx->net); if (err) goto nf_ct_failure; @@ -219,7 +218,6 @@ static void nft_synproxy_do_destroy(const struct nft_ctx *ctx) break; #endif case NFPROTO_INET: - case NFPROTO_BRIDGE: nf_synproxy_ipv4_fini(snet, ctx->net); nf_synproxy_ipv6_fini(snet, ctx->net); break; @@ -253,6 +251,11 @@ static int nft_synproxy_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) { + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD)); } diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index b97ab1198b03..d9604a316600 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c @@ -13,9 +13,9 @@ #endif struct nft_tproxy { - enum nft_registers sreg_addr:8; - enum nft_registers sreg_port:8; - u8 family; + u8 sreg_addr; + u8 sreg_port; + u8 family; }; static void nft_tproxy_eval_v4(const struct nft_expr *expr, @@ -254,15 +254,15 @@ static int nft_tproxy_init(const struct nft_ctx *ctx, } if (tb[NFTA_TPROXY_REG_ADDR]) { - priv->sreg_addr = nft_parse_register(tb[NFTA_TPROXY_REG_ADDR]); - err = nft_validate_register_load(priv->sreg_addr, alen); + err = nft_parse_register_load(tb[NFTA_TPROXY_REG_ADDR], + &priv->sreg_addr, alen); if (err < 0) return err; } if (tb[NFTA_TPROXY_REG_PORT]) { - priv->sreg_port = nft_parse_register(tb[NFTA_TPROXY_REG_PORT]); - err = nft_validate_register_load(priv->sreg_port, sizeof(u16)); + err = nft_parse_register_load(tb[NFTA_TPROXY_REG_PORT], + &priv->sreg_port, sizeof(u16)); if (err < 0) return err; } @@ -289,6 +289,18 @@ static int nft_tproxy_dump(struct sk_buff *skb, return 0; } +static int nft_tproxy_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + + return nft_chain_validate_hooks(ctx->chain, 1 << NF_INET_PRE_ROUTING); +} + static struct nft_expr_type nft_tproxy_type; static const struct nft_expr_ops nft_tproxy_ops = { .type = &nft_tproxy_type, @@ -296,6 +308,7 @@ static const struct nft_expr_ops nft_tproxy_ops = { .eval = nft_tproxy_eval, .init = nft_tproxy_init, .dump = nft_tproxy_dump, + .validate = nft_tproxy_validate, }; static struct nft_expr_type nft_tproxy_type __read_mostly = { diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 1effd4878619..b2070f9f98ff 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -14,7 +14,7 @@ struct nft_tunnel { enum nft_tunnel_keys key:8; - enum nft_registers dreg:8; + u8 dreg; enum nft_tunnel_mode mode:8; }; @@ -92,8 +92,6 @@ static int nft_tunnel_get_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - priv->dreg = nft_parse_register(tb[NFTA_TUNNEL_DREG]); - if (tb[NFTA_TUNNEL_MODE]) { priv->mode = ntohl(nla_get_be32(tb[NFTA_TUNNEL_MODE])); if (priv->mode > NFT_TUNNEL_MODE_MAX) @@ -102,8 +100,8 @@ static int nft_tunnel_get_init(const struct nft_ctx *ctx, priv->mode = NFT_TUNNEL_MODE_NONE; } - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + return nft_parse_register_store(ctx, tb[NFTA_TUNNEL_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); } static int nft_tunnel_get_dump(struct sk_buff *skb, @@ -134,6 +132,7 @@ static const struct nft_expr_ops nft_tunnel_get_ops = { static struct nft_expr_type nft_tunnel_type __read_mostly = { .name = "tunnel", + .family = NFPROTO_NETDEV, .ops = &nft_tunnel_get_ops, .policy = nft_tunnel_policy, .maxattr = NFTA_TUNNEL_MAX, diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c index 06d5cabf1d7c..7f762fc42891 100644 --- a/net/netfilter/nft_xfrm.c +++ b/net/netfilter/nft_xfrm.c @@ -24,7 +24,7 @@ static const struct nla_policy nft_xfrm_policy[NFTA_XFRM_MAX + 1] = { struct nft_xfrm { enum nft_xfrm_keys key:8; - enum nft_registers dreg:8; + u8 dreg; u8 dir; u8 spnum; }; @@ -86,9 +86,8 @@ static int nft_xfrm_get_init(const struct nft_ctx *ctx, priv->spnum = spnum; - priv->dreg = nft_parse_register(tb[NFTA_XFRM_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + return nft_parse_register_store(ctx, tb[NFTA_XFRM_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); } /* Return true if key asks for daddr/saddr and current @@ -234,6 +233,11 @@ static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *e const struct nft_xfrm *priv = nft_expr_priv(expr); unsigned int hooks; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + switch (priv->dir) { case XFRM_POLICY_IN: hooks = (1 << NF_INET_FORWARD) | diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c index 353ca7801251..ff66b56a3f97 100644 --- a/net/netfilter/xt_REDIRECT.c +++ b/net/netfilter/xt_REDIRECT.c @@ -46,7 +46,6 @@ static void redirect_tg_destroy(const struct xt_tgdtor_param *par) nf_ct_netns_put(par->net, par->family); } -/* FIXME: Take multiple ranges --RR */ static int redirect_tg4_check(const struct xt_tgchk_param *par) { const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; @@ -65,7 +64,14 @@ static int redirect_tg4_check(const struct xt_tgchk_param *par) static unsigned int redirect_tg4(struct sk_buff *skb, const struct xt_action_param *par) { - return nf_nat_redirect_ipv4(skb, par->targinfo, xt_hooknum(par)); + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + struct nf_nat_range2 range = { + .flags = mr->range[0].flags, + .min_proto = mr->range[0].min, + .max_proto = mr->range[0].max, + }; + + return nf_nat_redirect_ipv4(skb, &range, xt_hooknum(par)); } static struct xt_target redirect_tg_reg[] __read_mostly = { diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index e1990baf3a3b..dc9485854002 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -71,4 +71,3 @@ MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>"); MODULE_DESCRIPTION("Passive OS fingerprint matching."); MODULE_ALIAS("ipt_osf"); MODULE_ALIAS("ip6t_osf"); -MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF); diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index e85ce69924ae..50332888c8d2 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -76,18 +76,23 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) */ return false; - filp = sk->sk_socket->file; - if (filp == NULL) + read_lock_bh(&sk->sk_callback_lock); + filp = sk->sk_socket ? sk->sk_socket->file : NULL; + if (filp == NULL) { + read_unlock_bh(&sk->sk_callback_lock); return ((info->match ^ info->invert) & (XT_OWNER_UID | XT_OWNER_GID)) == 0; + } if (info->match & XT_OWNER_UID) { kuid_t uid_min = make_kuid(net->user_ns, info->uid_min); kuid_t uid_max = make_kuid(net->user_ns, info->uid_max); if ((uid_gte(filp->f_cred->fsuid, uid_min) && uid_lte(filp->f_cred->fsuid, uid_max)) ^ - !(info->invert & XT_OWNER_UID)) + !(info->invert & XT_OWNER_UID)) { + read_unlock_bh(&sk->sk_callback_lock); return false; + } } if (info->match & XT_OWNER_GID) { @@ -112,10 +117,13 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) } } - if (match ^ !(info->invert & XT_OWNER_GID)) + if (match ^ !(info->invert & XT_OWNER_GID)) { + read_unlock_bh(&sk->sk_callback_lock); return false; + } } + read_unlock_bh(&sk->sk_callback_lock); return true; } diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 3469b6073610..6fc0deb11aff 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -561,7 +561,7 @@ recent_mt_proc_write(struct file *file, const char __user *input, { struct recent_table *t = PDE_DATA(file_inode(file)); struct recent_entry *e; - char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")]; + char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:255.255.255.255")]; const char *c = buf; union nf_inet_addr addr = {}; u_int16_t family; diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index 680015ba7cb6..d4bf089c9e3f 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -150,6 +150,8 @@ static int sctp_mt_check(const struct xt_mtchk_param *par) { const struct xt_sctp_info *info = par->matchinfo; + if (info->flag_count > ARRAY_SIZE(info->flag_info)) + return -EINVAL; if (info->flags & ~XT_SCTP_VALID_FLAGS) return -EINVAL; if (info->invflags & ~XT_SCTP_VALID_FLAGS) diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c index 177b40d08098..117d4615d668 100644 --- a/net/netfilter/xt_u32.c +++ b/net/netfilter/xt_u32.c @@ -96,11 +96,32 @@ static bool u32_mt(const struct sk_buff *skb, struct xt_action_param *par) return ret ^ data->invert; } +static int u32_mt_checkentry(const struct xt_mtchk_param *par) +{ + const struct xt_u32 *data = par->matchinfo; + const struct xt_u32_test *ct; + unsigned int i; + + if (data->ntests > ARRAY_SIZE(data->tests)) + return -EINVAL; + + for (i = 0; i < data->ntests; ++i) { + ct = &data->tests[i]; + + if (ct->nnums > ARRAY_SIZE(ct->location) || + ct->nvalues > ARRAY_SIZE(ct->value)) + return -EINVAL; + } + + return 0; +} + static struct xt_match xt_u32_mt_reg __read_mostly = { .name = "u32", .revision = 0, .family = NFPROTO_UNSPEC, .match = u32_mt, + .checkentry = u32_mt_checkentry, .matchsize = sizeof(struct xt_u32), .me = THIS_MODULE, }; |