From ab567a9c2c31d0db84ddf9d8731c76d4b716fb84 Mon Sep 17 00:00:00 2001 From: Zumeng Chen Date: Thu, 24 Nov 2011 16:20:45 +0800 Subject: [PATCH 28/41] Integrate network related features from 3.0 for ar71xx which from the following git repo. git://nbd.name/openwrt.git 600-netfilter_layer7_2.22.patch 601-netfilter_layer7_pktmatch.patch 602-netfilter_layer7_match.patch 603-netfilter_layer7_2.6.36_fix.patch 604-netfilter_cisco_794x_iphone.patch 605-netfilter_rtsp.patch 610-netfilter_match_bypass_default_checks.patch 611-netfilter_match_bypass_default_table.patch 612-netfilter_match_reduce_memory_access.patch 613-netfilter_optional_tcp_window_check.patch 620-sched_esfq.patch 621-sched_act_connmark.patch 630-packet_socket_type.patch 640-bridge_no_eap_forward.patch 641-bridge_always_accept_eap.patch 642-bridge_port_isolate.patch 643-bridge_remove_ipv6_dependency.patch 650-pppoe_header_pad.patch 651-wireless_mesh_header.patch 652-atm_header_changes.patch 700-swconfig.patch 701-phy_extension.patch 702-phy_add_aneg_done_function.patch 720-phy_adm6996.patch 721-phy_packets.patch 722-phy_mvswitch.patch 723-phy_ip175c.patch 724-phy_ar8216.patch 725-phy_rtl8306.patch 726-phy_rtl8366.patch 750-hostap_txpower.patch Integrated-by: Zumeng Chen --- drivers/net/phy/Kconfig | 55 ++- drivers/net/phy/Makefile | 9 + drivers/net/phy/phy.c | 49 ++- drivers/net/phy/phy_device.c | 17 + drivers/net/phy/swconfig.c | 956 +++++++++++++++++++++ drivers/net/pppoe.c | 4 +- drivers/net/wireless/hostap/hostap.h | 1 + drivers/net/wireless/hostap/hostap_ap.c | 24 +- drivers/net/wireless/hostap/hostap_config.h | 5 + drivers/net/wireless/hostap/hostap_hw.c | 1 + drivers/net/wireless/hostap/hostap_info.c | 5 + drivers/net/wireless/hostap/hostap_ioctl.c | 43 +- include/linux/ath5k_platform.h | 30 + include/linux/ath9k_platform.h | 21 +- include/linux/atm.h | 3 + include/linux/if_packet.h | 3 + include/linux/netdevice.h | 3 +- include/linux/netfilter/nf_conntrack_rtsp.h | 63 ++ include/linux/netfilter/nf_conntrack_sip.h | 3 + include/linux/netfilter/xt_layer7.h | 14 + include/linux/netfilter_helpers.h | 133 +++ include/linux/netfilter_ipv4/ip_tables.h | 1 + include/linux/netfilter_mime.h | 89 ++ include/linux/phy.h | 24 + include/linux/pkt_sched.h | 29 + include/linux/switch.h | 11 +- include/net/addrconf.h | 6 + include/net/netfilter/nf_conntrack.h | 16 + net/bridge/Kconfig | 1 - net/bridge/br_forward.c | 12 +- net/bridge/br_input.c | 11 +- net/bridge/br_private.h | 1 + net/bridge/br_sysfs_if.c | 17 + net/ipv4/netfilter/Kconfig | 5 + net/ipv4/netfilter/Makefile | 1 + net/ipv4/netfilter/ip_tables.c | 103 ++- net/ipv4/netfilter/nf_nat_rtsp.c | 496 +++++++++++ net/ipv4/netfilter/nf_nat_sip.c | 26 +- net/ipv6/Makefile | 1 + net/ipv6/addrconf.c | 9 +- net/ipv6/inet6_stubs.c | 27 + net/netfilter/Kconfig | 37 + net/netfilter/Makefile | 2 + net/netfilter/nf_conntrack_core.c | 8 + net/netfilter/nf_conntrack_proto_tcp.c | 13 + net/netfilter/nf_conntrack_rtsp.c | 517 ++++++++++++ net/netfilter/nf_conntrack_sip.c | 17 + net/netfilter/nf_conntrack_standalone.c | 6 + net/netfilter/regexp/regexp.c | 1197 +++++++++++++++++++++++++++ net/netfilter/regexp/regexp.h | 41 + net/netfilter/regexp/regmagic.h | 5 + net/netfilter/regexp/regsub.c | 95 +++ net/netfilter/xt_layer7.c | 700 ++++++++++++++++ net/packet/af_packet.c | 35 +- net/sched/Kconfig | 44 + net/sched/Makefile | 2 + net/sched/act_connmark.c | 137 +++ net/sched/sch_esfq.c | 702 ++++++++++++++++ 58 files changed, 5817 insertions(+), 69 deletions(-) create mode 100644 drivers/net/phy/swconfig.c create mode 100644 include/linux/ath5k_platform.h create mode 100644 include/linux/netfilter/nf_conntrack_rtsp.h create mode 100644 include/linux/netfilter/xt_layer7.h create mode 100644 include/linux/netfilter_helpers.h create mode 100644 include/linux/netfilter_mime.h create mode 100644 net/ipv4/netfilter/nf_nat_rtsp.c create mode 100644 net/ipv6/inet6_stubs.c create mode 100644 net/netfilter/nf_conntrack_rtsp.c create mode 100644 net/netfilter/regexp/regexp.c create mode 100644 net/netfilter/regexp/regexp.h create mode 100644 net/netfilter/regexp/regmagic.h create mode 100644 net/netfilter/regexp/regsub.c create mode 100644 net/netfilter/xt_layer7.c create mode 100644 net/sched/act_connmark.c create mode 100644 net/sched/sch_esfq.c diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 745c5a9..921c563 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -13,6 +13,12 @@ menuconfig PHYLIB if PHYLIB +config SWCONFIG + tristate "Switch configuration API" + ---help--- + Switch configuration API using netlink. This allows + you to configure the VLAN features of certain switches. + comment "MII PHY device drivers" config MARVELL_PHY @@ -90,7 +96,29 @@ config LSI_ET1011C_PHY config MICREL_PHY tristate "Driver for Micrel PHYs" ---help--- - Supports the KSZ8041, KSZ9021, VSC8201, KS8001 PHYs. + Supports the KSZ9021, VSC8201, KS8001,and KSZ8041 PHYs. + +config ADM6996_PHY + tristate "Driver for ADM6996 switches" + select SWCONFIG + ---help--- + Currently supports the ADM6996FC and ADM6996M switches. + Support for FC is very limited. + +config MVSWITCH_PHY + tristate "Driver for Marvell 88E6060 switches" + +config IP17XX_PHY + tristate "Driver for IC+ IP17xx switches" + select SWCONFIG + +config AR8216_PHY + tristate "Driver for Atheros AR8216 switches" + select SWCONFIG + +config RTL8306_PHY + tristate "Driver for Realtek RTL8306S switches" + select SWCONFIG config FIXED_PHY bool "Driver for MDIO Bus/PHY emulation with fixed speed/link PHYs" @@ -130,4 +158,29 @@ config MDIO_OCTEON If in doubt, say Y. +config RTL8366_SMI + tristate "Driver for the RTL8366 SMI interface" + depends on GENERIC_GPIO + ---help--- + This module implements the SMI interface protocol which is used + by some RTL8366 ethernet switch devices via the generic GPIO API. + +if RTL8366_SMI + +config RTL8366S_PHY + tristate "Driver for the Realtek RTL8366S switch" + select SWCONFIG + +config RTL8366RB_PHY + tristate "Driver for the Realtek RTL8366RB switch" + select SWCONFIG + +config RTL8366S_PHY_DEBUG_FS + bool "RTL8366 switch driver DEBUG_FS support" + depends on RTL8366S_PHY || RTL8366RB_PHY + depends on DEBUG_FS + default n + +endif # RTL8366_SMI + endif # PHYLIB diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 2333215..6da2c32 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -3,6 +3,7 @@ libphy-objs := phy.o phy_device.o mdio_bus.o obj-$(CONFIG_PHYLIB) += libphy.o +obj-$(CONFIG_SWCONFIG) += swconfig.o obj-$(CONFIG_MARVELL_PHY) += marvell.o obj-$(CONFIG_DAVICOM_PHY) += davicom.o obj-$(CONFIG_CICADA_PHY) += cicada.o @@ -13,7 +14,15 @@ obj-$(CONFIG_VITESSE_PHY) += vitesse.o obj-$(CONFIG_BROADCOM_PHY) += broadcom.o obj-$(CONFIG_BCM63XX_PHY) += bcm63xx.o obj-$(CONFIG_ICPLUS_PHY) += icplus.o +obj-$(CONFIG_ADM6996_PHY) += adm6996.o +obj-$(CONFIG_MVSWITCH_PHY) += mvswitch.o +obj-$(CONFIG_IP17XX_PHY) += ip17xx.o obj-$(CONFIG_REALTEK_PHY) += realtek.o +obj-$(CONFIG_AR8216_PHY) += ar8216.o +obj-$(CONFIG_RTL8306_PHY) += rtl8306.o +obj-$(CONFIG_RTL8366_SMI) += rtl8366_smi.o +obj-$(CONFIG_RTL8366S_PHY) += rtl8366s.o +obj-$(CONFIG_RTL8366RB_PHY) += rtl8366rb.o obj-$(CONFIG_LSI_ET1011C_PHY) += et1011c.o obj-$(CONFIG_FIXED_PHY) += fixed.o obj-$(CONFIG_MDIO_BITBANG) += mdio-bitbang.o diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index a475957..e01ee2b 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -106,6 +106,9 @@ static inline int phy_aneg_done(struct phy_device *phydev) { int retval; + if (phydev->drv->aneg_done) + return phydev->drv->aneg_done(phydev); + retval = phy_read(phydev, MII_BMSR); return (retval < 0) ? retval : (retval & BMSR_ANEGCOMPLETE); @@ -299,6 +302,50 @@ int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd) } EXPORT_SYMBOL(phy_ethtool_gset); +int phy_ethtool_ioctl(struct phy_device *phydev, void *useraddr) +{ + u32 cmd; + int tmp; + struct ethtool_cmd ecmd = { ETHTOOL_GSET }; + struct ethtool_value edata = { ETHTOOL_GLINK }; + + if (get_user(cmd, (u32 *) useraddr)) + return -EFAULT; + + switch (cmd) { + case ETHTOOL_GSET: + phy_ethtool_gset(phydev, &ecmd); + if (copy_to_user(useraddr, &ecmd, sizeof(ecmd))) + return -EFAULT; + return 0; + + case ETHTOOL_SSET: + if (copy_from_user(&ecmd, useraddr, sizeof(ecmd))) + return -EFAULT; + return phy_ethtool_sset(phydev, &ecmd); + + case ETHTOOL_NWAY_RST: + /* if autoneg is off, it's an error */ + tmp = phy_read(phydev, MII_BMCR); + if (tmp & BMCR_ANENABLE) { + tmp |= (BMCR_ANRESTART); + phy_write(phydev, MII_BMCR, tmp); + return 0; + } + return -EINVAL; + + case ETHTOOL_GLINK: + edata.data = (phy_read(phydev, + MII_BMSR) & BMSR_LSTATUS) ? 1 : 0; + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; + } + + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(phy_ethtool_ioctl); + /** * phy_mii_ioctl - generic PHY MII ioctl interface * @phydev: the phy_device struct @@ -474,7 +521,7 @@ static void phy_force_reduction(struct phy_device *phydev) int idx; idx = phy_find_setting(phydev->speed, phydev->duplex); - + idx++; idx = phy_find_valid(idx, phydev->supported); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index ff109fe..3df2db0 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -149,6 +149,18 @@ int phy_scan_fixups(struct phy_device *phydev) } EXPORT_SYMBOL(phy_scan_fixups); +static int generic_receive_skb(struct sk_buff *skb) +{ + skb->protocol = eth_type_trans(skb, skb->dev); + return netif_receive_skb(skb); +} + +static int generic_rx(struct sk_buff *skb) +{ + skb->protocol = eth_type_trans(skb, skb->dev); + return netif_rx(skb); +} + static struct phy_device* phy_device_create(struct mii_bus *bus, int addr, int phy_id) { @@ -180,6 +192,8 @@ static struct phy_device* phy_device_create(struct mii_bus *bus, dev_set_name(&dev->dev, PHY_ID_FMT, bus->id, addr); dev->state = PHY_DOWN; + dev->netif_receive_skb = &generic_receive_skb; + dev->netif_rx = &generic_rx; mutex_init(&dev->lock); INIT_DELAYED_WORK(&dev->state_queue, phy_state_machine); @@ -719,6 +733,9 @@ int genphy_update_link(struct phy_device *phydev) { int status; + if (phydev->drv->update_link) + return phydev->drv->update_link(phydev); + /* Do a fake read */ status = phy_read(phydev, MII_BMSR); diff --git a/drivers/net/phy/swconfig.c b/drivers/net/phy/swconfig.c new file mode 100644 index 0000000..2038330 --- /dev/null +++ b/drivers/net/phy/swconfig.c @@ -0,0 +1,956 @@ +/* + * swconfig.c: Switch configuration API + * + * Copyright (C) 2008 Felix Fietkau + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//#define DEBUG 1 +#ifdef DEBUG +#define DPRINTF(format, ...) printk("%s: " format, __func__, ##__VA_ARGS__) +#else +#define DPRINTF(...) do {} while(0) +#endif + +#define SWCONFIG_DEVNAME "switch%d" + +MODULE_AUTHOR("Felix Fietkau "); +MODULE_LICENSE("GPL"); + +static int swdev_id = 0; +static struct list_head swdevs; +static DEFINE_SPINLOCK(swdevs_lock); +struct swconfig_callback; + +struct swconfig_callback +{ + struct sk_buff *msg; + struct genlmsghdr *hdr; + struct genl_info *info; + int cmd; + + /* callback for filling in the message data */ + int (*fill)(struct swconfig_callback *cb, void *arg); + + /* callback for closing the message before sending it */ + int (*close)(struct swconfig_callback *cb, void *arg); + + struct nlattr *nest[4]; + int args[4]; +}; + +/* defaults */ + +static int +swconfig_get_vlan_ports(struct switch_dev *dev, const struct switch_attr *attr, struct switch_val *val) +{ + int ret; + if (val->port_vlan >= dev->vlans) + return -EINVAL; + + if (!dev->ops->get_vlan_ports) + return -EOPNOTSUPP; + + ret = dev->ops->get_vlan_ports(dev, val); + return ret; +} + +static int +swconfig_set_vlan_ports(struct switch_dev *dev, const struct switch_attr *attr, struct switch_val *val) +{ + struct switch_port *ports = val->value.ports; + const struct switch_dev_ops *ops = dev->ops; + int i; + + if (val->port_vlan >= dev->vlans) + return -EINVAL; + + /* validate ports */ + if (val->len > dev->ports) + return -EINVAL; + + if (!ops->set_vlan_ports) + return -EOPNOTSUPP; + + for (i = 0; i < val->len; i++) { + if (ports[i].id >= dev->ports) + return -EINVAL; + + if (ops->set_port_pvid && + !(ports[i].flags & (1 << SWITCH_PORT_FLAG_TAGGED))) + ops->set_port_pvid(dev, ports[i].id, val->port_vlan); + } + + return ops->set_vlan_ports(dev, val); +} + +static int +swconfig_set_pvid(struct switch_dev *dev, const struct switch_attr *attr, struct switch_val *val) +{ + if (val->port_vlan >= dev->ports) + return -EINVAL; + + if (!dev->ops->set_port_pvid) + return -EOPNOTSUPP; + + return dev->ops->set_port_pvid(dev, val->port_vlan, val->value.i); +} + +static int +swconfig_get_pvid(struct switch_dev *dev, const struct switch_attr *attr, struct switch_val *val) +{ + if (val->port_vlan >= dev->ports) + return -EINVAL; + + if (!dev->ops->get_port_pvid) + return -EOPNOTSUPP; + + return dev->ops->get_port_pvid(dev, val->port_vlan, &val->value.i); +} + +static int +swconfig_apply_config(struct switch_dev *dev, const struct switch_attr *attr, struct switch_val *val) +{ + /* don't complain if not supported by the switch driver */ + if (!dev->ops->apply_config) + return 0; + + return dev->ops->apply_config(dev); +} + +static int +swconfig_reset_switch(struct switch_dev *dev, const struct switch_attr *attr, struct switch_val *val) +{ + /* don't complain if not supported by the switch driver */ + if (!dev->ops->reset_switch) + return 0; + + return dev->ops->reset_switch(dev); +} + +enum global_defaults { + GLOBAL_APPLY, + GLOBAL_RESET, +}; + +enum vlan_defaults { + VLAN_PORTS, +}; + +enum port_defaults { + PORT_PVID, +}; + +static struct switch_attr default_global[] = { + [GLOBAL_APPLY] = { + .type = SWITCH_TYPE_NOVAL, + .name = "apply", + .description = "Activate changes in the hardware", + .set = swconfig_apply_config, + }, + [GLOBAL_RESET] = { + .type = SWITCH_TYPE_NOVAL, + .name = "reset", + .description = "Reset the switch", + .set = swconfig_reset_switch, + } +}; + +static struct switch_attr default_port[] = { + [PORT_PVID] = { + .type = SWITCH_TYPE_INT, + .name = "pvid", + .description = "Primary VLAN ID", + .set = swconfig_set_pvid, + .get = swconfig_get_pvid, + } +}; + +static struct switch_attr default_vlan[] = { + [VLAN_PORTS] = { + .type = SWITCH_TYPE_PORTS, + .name = "ports", + .description = "VLAN port mapping", + .set = swconfig_set_vlan_ports, + .get = swconfig_get_vlan_ports, + }, +}; + + +static void swconfig_defaults_init(struct switch_dev *dev) +{ + const struct switch_dev_ops *ops = dev->ops; + + dev->def_global = 0; + dev->def_vlan = 0; + dev->def_port = 0; + + if (ops->get_vlan_ports || ops->set_vlan_ports) + set_bit(VLAN_PORTS, &dev->def_vlan); + + if (ops->get_port_pvid || ops->set_port_pvid) + set_bit(PORT_PVID, &dev->def_port); + + /* always present, can be no-op */ + set_bit(GLOBAL_APPLY, &dev->def_global); + set_bit(GLOBAL_RESET, &dev->def_global); +} + + +static struct genl_family switch_fam = { + .id = GENL_ID_GENERATE, + .name = "switch", + .hdrsize = 0, + .version = 1, + .maxattr = SWITCH_ATTR_MAX, +}; + +static const struct nla_policy switch_policy[SWITCH_ATTR_MAX+1] = { + [SWITCH_ATTR_ID] = { .type = NLA_U32 }, + [SWITCH_ATTR_OP_ID] = { .type = NLA_U32 }, + [SWITCH_ATTR_OP_PORT] = { .type = NLA_U32 }, + [SWITCH_ATTR_OP_VLAN] = { .type = NLA_U32 }, + [SWITCH_ATTR_OP_VALUE_INT] = { .type = NLA_U32 }, + [SWITCH_ATTR_OP_VALUE_STR] = { .type = NLA_NUL_STRING }, + [SWITCH_ATTR_OP_VALUE_PORTS] = { .type = NLA_NESTED }, + [SWITCH_ATTR_TYPE] = { .type = NLA_U32 }, +}; + +static const struct nla_policy port_policy[SWITCH_PORT_ATTR_MAX+1] = { + [SWITCH_PORT_ID] = { .type = NLA_U32 }, + [SWITCH_PORT_FLAG_TAGGED] = { .type = NLA_FLAG }, +}; + +static inline void +swconfig_lock(void) +{ + spin_lock(&swdevs_lock); +} + +static inline void +swconfig_unlock(void) +{ + spin_unlock(&swdevs_lock); +} + +static struct switch_dev * +swconfig_get_dev(struct genl_info *info) +{ + struct switch_dev *dev = NULL; + struct switch_dev *p; + int id; + + if (!info->attrs[SWITCH_ATTR_ID]) + goto done; + + id = nla_get_u32(info->attrs[SWITCH_ATTR_ID]); + swconfig_lock(); + list_for_each_entry(p, &swdevs, dev_list) { + if (id != p->id) + continue; + + dev = p; + break; + } + if (dev) + spin_lock(&dev->lock); + else + DPRINTF("device %d not found\n", id); + swconfig_unlock(); +done: + return dev; +} + +static inline void +swconfig_put_dev(struct switch_dev *dev) +{ + spin_unlock(&dev->lock); +} + +static int +swconfig_dump_attr(struct swconfig_callback *cb, void *arg) +{ + struct switch_attr *op = arg; + struct genl_info *info = cb->info; + struct sk_buff *msg = cb->msg; + int id = cb->args[0]; + void *hdr; + + hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, &switch_fam, + NLM_F_MULTI, SWITCH_CMD_NEW_ATTR); + if (IS_ERR(hdr)) + return -1; + + NLA_PUT_U32(msg, SWITCH_ATTR_OP_ID, id); + NLA_PUT_U32(msg, SWITCH_ATTR_OP_TYPE, op->type); + NLA_PUT_STRING(msg, SWITCH_ATTR_OP_NAME, op->name); + if (op->description) + NLA_PUT_STRING(msg, SWITCH_ATTR_OP_DESCRIPTION, + op->description); + + return genlmsg_end(msg, hdr); +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/* spread multipart messages across multiple message buffers */ +static int +swconfig_send_multipart(struct swconfig_callback *cb, void *arg) +{ + struct genl_info *info = cb->info; + int restart = 0; + int err; + + do { + if (!cb->msg) { + cb->msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (cb->msg == NULL) + goto error; + } + + if (!(cb->fill(cb, arg) < 0)) + break; + + /* fill failed, check if this was already the second attempt */ + if (restart) + goto error; + + /* try again in a new message, send the current one */ + restart = 1; + if (cb->close) { + if (cb->close(cb, arg) < 0) + goto error; + } + err = genlmsg_reply(cb->msg, info); + cb->msg = NULL; + if (err < 0) + goto error; + + } while (restart); + + return 0; + +error: + if (cb->msg) + nlmsg_free(cb->msg); + return -1; +} + +static int +swconfig_list_attrs(struct sk_buff *skb, struct genl_info *info) +{ + struct genlmsghdr *hdr = nlmsg_data(info->nlhdr); + const struct switch_attrlist *alist; + struct switch_dev *dev; + struct swconfig_callback cb; + int err = -EINVAL; + int i; + + /* defaults */ + struct switch_attr *def_list; + unsigned long *def_active; + int n_def; + + dev = swconfig_get_dev(info); + if (!dev) + return -EINVAL; + + switch(hdr->cmd) { + case SWITCH_CMD_LIST_GLOBAL: + alist = &dev->ops->attr_global; + def_list = default_global; + def_active = &dev->def_global; + n_def = ARRAY_SIZE(default_global); + break; + case SWITCH_CMD_LIST_VLAN: + alist = &dev->ops->attr_vlan; + def_list = default_vlan; + def_active = &dev->def_vlan; + n_def = ARRAY_SIZE(default_vlan); + break; + case SWITCH_CMD_LIST_PORT: + alist = &dev->ops->attr_port; + def_list = default_port; + def_active = &dev->def_port; + n_def = ARRAY_SIZE(default_port); + break; + default: + WARN_ON(1); + goto out; + } + + memset(&cb, 0, sizeof(cb)); + cb.info = info; + cb.fill = swconfig_dump_attr; + for (i = 0; i < alist->n_attr; i++) { + if (alist->attr[i].disabled) + continue; + cb.args[0] = i; + err = swconfig_send_multipart(&cb, (void *) &alist->attr[i]); + if (err < 0) + goto error; + } + + /* defaults */ + for (i = 0; i < n_def; i++) { + if (!test_bit(i, def_active)) + continue; + cb.args[0] = SWITCH_ATTR_DEFAULTS_OFFSET + i; + err = swconfig_send_multipart(&cb, (void *) &def_list[i]); + if (err < 0) + goto error; + } + swconfig_put_dev(dev); + + if (!cb.msg) + return 0; + + return genlmsg_reply(cb.msg, info); + +error: + if (cb.msg) + nlmsg_free(cb.msg); +out: + swconfig_put_dev(dev); + return err; +} + +static const struct switch_attr * +swconfig_lookup_attr(struct switch_dev *dev, struct genl_info *info, + struct switch_val *val) +{ + struct genlmsghdr *hdr = nlmsg_data(info->nlhdr); + const struct switch_attrlist *alist; + const struct switch_attr *attr = NULL; + int attr_id; + + /* defaults */ + struct switch_attr *def_list; + unsigned long *def_active; + int n_def; + + if (!info->attrs[SWITCH_ATTR_OP_ID]) + goto done; + + switch(hdr->cmd) { + case SWITCH_CMD_SET_GLOBAL: + case SWITCH_CMD_GET_GLOBAL: + alist = &dev->ops->attr_global; + def_list = default_global; + def_active = &dev->def_global; + n_def = ARRAY_SIZE(default_global); + break; + case SWITCH_CMD_SET_VLAN: + case SWITCH_CMD_GET_VLAN: + alist = &dev->ops->attr_vlan; + def_list = default_vlan; + def_active = &dev->def_vlan; + n_def = ARRAY_SIZE(default_vlan); + if (!info->attrs[SWITCH_ATTR_OP_VLAN]) + goto done; + val->port_vlan = nla_get_u32(info->attrs[SWITCH_ATTR_OP_VLAN]); + if (val->port_vlan >= dev->vlans) + goto done; + break; + case SWITCH_CMD_SET_PORT: + case SWITCH_CMD_GET_PORT: + alist = &dev->ops->attr_port; + def_list = default_port; + def_active = &dev->def_port; + n_def = ARRAY_SIZE(default_port); + if (!info->attrs[SWITCH_ATTR_OP_PORT]) + goto done; + val->port_vlan = nla_get_u32(info->attrs[SWITCH_ATTR_OP_PORT]); + if (val->port_vlan >= dev->ports) + goto done; + break; + default: + WARN_ON(1); + goto done; + } + + if (!alist) + goto done; + + attr_id = nla_get_u32(info->attrs[SWITCH_ATTR_OP_ID]); + if (attr_id >= SWITCH_ATTR_DEFAULTS_OFFSET) { + attr_id -= SWITCH_ATTR_DEFAULTS_OFFSET; + if (attr_id >= n_def) + goto done; + if (!test_bit(attr_id, def_active)) + goto done; + attr = &def_list[attr_id]; + } else { + if (attr_id >= alist->n_attr) + goto done; + attr = &alist->attr[attr_id]; + } + + if (attr->disabled) + attr = NULL; + +done: + if (!attr) + DPRINTF("attribute lookup failed\n"); + val->attr = attr; + return attr; +} + +static int +swconfig_parse_ports(struct sk_buff *msg, struct nlattr *head, + struct switch_val *val, int max) +{ + struct nlattr *nla; + int rem; + + val->len = 0; + nla_for_each_nested(nla, head, rem) { + struct nlattr *tb[SWITCH_PORT_ATTR_MAX+1]; + struct switch_port *port = &val->value.ports[val->len]; + + if (val->len >= max) + return -EINVAL; + + if (nla_parse_nested(tb, SWITCH_PORT_ATTR_MAX, nla, + port_policy)) + return -EINVAL; + + if (!tb[SWITCH_PORT_ID]) + return -EINVAL; + + port->id = nla_get_u32(tb[SWITCH_PORT_ID]); + if (tb[SWITCH_PORT_FLAG_TAGGED]) + port->flags |= (1 << SWITCH_PORT_FLAG_TAGGED); + val->len++; + } + + return 0; +} + +static int +swconfig_set_attr(struct sk_buff *skb, struct genl_info *info) +{ + const struct switch_attr *attr; + struct switch_dev *dev; + struct switch_val val; + int err = -EINVAL; + + dev = swconfig_get_dev(info); + if (!dev) + return -EINVAL; + + memset(&val, 0, sizeof(val)); + attr = swconfig_lookup_attr(dev, info, &val); + if (!attr || !attr->set) + goto error; + + val.attr = attr; + switch(attr->type) { + case SWITCH_TYPE_NOVAL: + break; + case SWITCH_TYPE_INT: + if (!info->attrs[SWITCH_ATTR_OP_VALUE_INT]) + goto error; + val.value.i = + nla_get_u32(info->attrs[SWITCH_ATTR_OP_VALUE_INT]); + break; + case SWITCH_TYPE_STRING: + if (!info->attrs[SWITCH_ATTR_OP_VALUE_STR]) + goto error; + val.value.s = + nla_data(info->attrs[SWITCH_ATTR_OP_VALUE_STR]); + break; + case SWITCH_TYPE_PORTS: + val.value.ports = dev->portbuf; + memset(dev->portbuf, 0, + sizeof(struct switch_port) * dev->ports); + + /* TODO: implement multipart? */ + if (info->attrs[SWITCH_ATTR_OP_VALUE_PORTS]) { + err = swconfig_parse_ports(skb, + info->attrs[SWITCH_ATTR_OP_VALUE_PORTS], &val, dev->ports); + if (err < 0) + goto error; + } else { + val.len = 0; + err = 0; + } + break; + default: + goto error; + } + + err = attr->set(dev, attr, &val); +error: + swconfig_put_dev(dev); + return err; +} + +static int +swconfig_close_portlist(struct swconfig_callback *cb, void *arg) +{ + if (cb->nest[0]) + nla_nest_end(cb->msg, cb->nest[0]); + return 0; +} + +static int +swconfig_send_port(struct swconfig_callback *cb, void *arg) +{ + const struct switch_port *port = arg; + struct nlattr *p = NULL; + + if (!cb->nest[0]) { + cb->nest[0] = nla_nest_start(cb->msg, cb->cmd); + if (!cb->nest[0]) + return -1; + } + + p = nla_nest_start(cb->msg, SWITCH_ATTR_PORT); + if (!p) + goto error; + + NLA_PUT_U32(cb->msg, SWITCH_PORT_ID, port->id); + if (port->flags & (1 << SWITCH_PORT_FLAG_TAGGED)) + NLA_PUT_FLAG(cb->msg, SWITCH_PORT_FLAG_TAGGED); + + nla_nest_end(cb->msg, p); + return 0; + +nla_put_failure: + nla_nest_cancel(cb->msg, p); +error: + nla_nest_cancel(cb->msg, cb->nest[0]); + return -1; +} + +static int +swconfig_send_ports(struct sk_buff **msg, struct genl_info *info, int attr, + const struct switch_val *val) +{ + struct swconfig_callback cb; + int err = 0; + int i; + + if (!val->value.ports) + return -EINVAL; + + memset(&cb, 0, sizeof(cb)); + cb.cmd = attr; + cb.msg = *msg; + cb.info = info; + cb.fill = swconfig_send_port; + cb.close = swconfig_close_portlist; + + cb.nest[0] = nla_nest_start(cb.msg, cb.cmd); + for (i = 0; i < val->len; i++) { + err = swconfig_send_multipart(&cb, &val->value.ports[i]); + if (err) + goto done; + } + err = val->len; + swconfig_close_portlist(&cb, NULL); + *msg = cb.msg; + +done: + return err; +} + +static int +swconfig_get_attr(struct sk_buff *skb, struct genl_info *info) +{ + struct genlmsghdr *hdr = nlmsg_data(info->nlhdr); + const struct switch_attr *attr; + struct switch_dev *dev; + struct sk_buff *msg = NULL; + struct switch_val val; + int err = -EINVAL; + int cmd = hdr->cmd; + + dev = swconfig_get_dev(info); + if (!dev) + return -EINVAL; + + memset(&val, 0, sizeof(val)); + attr = swconfig_lookup_attr(dev, info, &val); + if (!attr || !attr->get) + goto error; + + if (attr->type == SWITCH_TYPE_PORTS) { + val.value.ports = dev->portbuf; + memset(dev->portbuf, 0, + sizeof(struct switch_port) * dev->ports); + } + + err = attr->get(dev, attr, &val); + if (err) + goto error; + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + goto error; + + hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, &switch_fam, + 0, cmd); + if (IS_ERR(hdr)) + goto nla_put_failure; + + switch(attr->type) { + case SWITCH_TYPE_INT: + NLA_PUT_U32(msg, SWITCH_ATTR_OP_VALUE_INT, val.value.i); + break; + case SWITCH_TYPE_STRING: + NLA_PUT_STRING(msg, SWITCH_ATTR_OP_VALUE_STR, val.value.s); + break; + case SWITCH_TYPE_PORTS: + err = swconfig_send_ports(&msg, info, + SWITCH_ATTR_OP_VALUE_PORTS, &val); + if (err < 0) + goto nla_put_failure; + break; + default: + DPRINTF("invalid type in attribute\n"); + err = -EINVAL; + goto error; + } + err = genlmsg_end(msg, hdr); + if (err < 0) + goto nla_put_failure; + + swconfig_put_dev(dev); + return genlmsg_reply(msg, info); + +nla_put_failure: + if (msg) + nlmsg_free(msg); +error: + swconfig_put_dev(dev); + if (!err) + err = -ENOMEM; + return err; +} + +static int +swconfig_send_switch(struct sk_buff *msg, u32 pid, u32 seq, int flags, + const struct switch_dev *dev) +{ + void *hdr; + + hdr = genlmsg_put(msg, pid, seq, &switch_fam, flags, + SWITCH_CMD_NEW_ATTR); + if (IS_ERR(hdr)) + return -1; + + NLA_PUT_U32(msg, SWITCH_ATTR_ID, dev->id); + NLA_PUT_STRING(msg, SWITCH_ATTR_DEV_NAME, dev->devname); + NLA_PUT_STRING(msg, SWITCH_ATTR_ALIAS, dev->alias); + NLA_PUT_STRING(msg, SWITCH_ATTR_NAME, dev->name); + NLA_PUT_U32(msg, SWITCH_ATTR_VLANS, dev->vlans); + NLA_PUT_U32(msg, SWITCH_ATTR_PORTS, dev->ports); + NLA_PUT_U32(msg, SWITCH_ATTR_CPU_PORT, dev->cpu_port); + + return genlmsg_end(msg, hdr); +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int swconfig_dump_switches(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct switch_dev *dev; + int start = cb->args[0]; + int idx = 0; + + swconfig_lock(); + list_for_each_entry(dev, &swdevs, dev_list) { + if (++idx <= start) + continue; + if (swconfig_send_switch(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + dev) < 0) + break; + } + swconfig_unlock(); + cb->args[0] = idx; + + return skb->len; +} + +static int +swconfig_done(struct netlink_callback *cb) +{ + return 0; +} + +static struct genl_ops swconfig_ops[] = { + { + .cmd = SWITCH_CMD_LIST_GLOBAL, + .doit = swconfig_list_attrs, + .policy = switch_policy, + }, + { + .cmd = SWITCH_CMD_LIST_VLAN, + .doit = swconfig_list_attrs, + .policy = switch_policy, + }, + { + .cmd = SWITCH_CMD_LIST_PORT, + .doit = swconfig_list_attrs, + .policy = switch_policy, + }, + { + .cmd = SWITCH_CMD_GET_GLOBAL, + .doit = swconfig_get_attr, + .policy = switch_policy, + }, + { + .cmd = SWITCH_CMD_GET_VLAN, + .doit = swconfig_get_attr, + .policy = switch_policy, + }, + { + .cmd = SWITCH_CMD_GET_PORT, + .doit = swconfig_get_attr, + .policy = switch_policy, + }, + { + .cmd = SWITCH_CMD_SET_GLOBAL, + .doit = swconfig_set_attr, + .policy = switch_policy, + }, + { + .cmd = SWITCH_CMD_SET_VLAN, + .doit = swconfig_set_attr, + .policy = switch_policy, + }, + { + .cmd = SWITCH_CMD_SET_PORT, + .doit = swconfig_set_attr, + .policy = switch_policy, + }, + { + .cmd = SWITCH_CMD_GET_SWITCH, + .dumpit = swconfig_dump_switches, + .policy = switch_policy, + .done = swconfig_done, + } +}; + +int +register_switch(struct switch_dev *dev, struct net_device *netdev) +{ + struct switch_dev *sdev; + const int max_switches = 8 * sizeof(unsigned long); + unsigned long in_use = 0; + int i; + + INIT_LIST_HEAD(&dev->dev_list); + if (netdev) { + dev->netdev = netdev; + if (!dev->alias) + dev->alias = netdev->name; + } + BUG_ON(!dev->alias); + + if (dev->ports > 0) { + dev->portbuf = kzalloc(sizeof(struct switch_port) * dev->ports, + GFP_KERNEL); + if (!dev->portbuf) + return -ENOMEM; + } + swconfig_defaults_init(dev); + spin_lock_init(&dev->lock); + swconfig_lock(); + dev->id = ++swdev_id; + + list_for_each_entry(sdev, &swdevs, dev_list) { + if (!sscanf(sdev->devname, SWCONFIG_DEVNAME, &i)) + continue; + if (i < 0 || i > max_switches) + continue; + + set_bit(i, &in_use); + } + i = find_first_zero_bit(&in_use, max_switches); + + if (i == max_switches) { + swconfig_unlock(); + return -ENFILE; + } + + /* fill device name */ + snprintf(dev->devname, IFNAMSIZ, SWCONFIG_DEVNAME, i); + + list_add(&dev->dev_list, &swdevs); + swconfig_unlock(); + + return 0; +} +EXPORT_SYMBOL_GPL(register_switch); + +void +unregister_switch(struct switch_dev *dev) +{ + kfree(dev->portbuf); + spin_lock(&dev->lock); + swconfig_lock(); + list_del(&dev->dev_list); + swconfig_unlock(); + spin_unlock(&dev->lock); +} +EXPORT_SYMBOL_GPL(unregister_switch); + + +static int __init +swconfig_init(void) +{ + int i, err; + + INIT_LIST_HEAD(&swdevs); + err = genl_register_family(&switch_fam); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(swconfig_ops); i++) { + err = genl_register_ops(&switch_fam, &swconfig_ops[i]); + if (err) + goto unregister; + } + + return 0; + +unregister: + genl_unregister_family(&switch_fam); + return err; +} + +static void __exit +swconfig_exit(void) +{ + genl_unregister_family(&switch_fam); +} + +module_init(swconfig_init); +module_exit(swconfig_exit); + diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index bc9a4bb..49c6da9 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -856,7 +856,7 @@ static int pppoe_sendmsg(struct kiocb *iocb, struct socket *sock, goto end; - skb = sock_wmalloc(sk, total_len + dev->hard_header_len + 32, + skb = sock_wmalloc(sk, total_len + dev->hard_header_len + 32 + NET_SKB_PAD, 0, GFP_KERNEL); if (!skb) { error = -ENOMEM; @@ -864,7 +864,7 @@ static int pppoe_sendmsg(struct kiocb *iocb, struct socket *sock, } /* Reserve space for headers. */ - skb_reserve(skb, dev->hard_header_len); + skb_reserve(skb, dev->hard_header_len + NET_SKB_PAD); skb_reset_network_header(skb); skb->dev = dev; diff --git a/drivers/net/wireless/hostap/hostap.h b/drivers/net/wireless/hostap/hostap.h index ce8721f..c638a83 100644 --- a/drivers/net/wireless/hostap/hostap.h +++ b/drivers/net/wireless/hostap/hostap.h @@ -90,6 +90,7 @@ extern const struct iw_handler_def hostap_iw_handler_def; extern const struct ethtool_ops prism2_ethtool_ops; int hostap_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); +int hostap_restore_power(struct net_device *dev); #endif /* HOSTAP_H */ diff --git a/drivers/net/wireless/hostap/hostap_ap.c b/drivers/net/wireless/hostap/hostap_ap.c index 3d05dc1..1529b35 100644 --- a/drivers/net/wireless/hostap/hostap_ap.c +++ b/drivers/net/wireless/hostap/hostap_ap.c @@ -2338,13 +2338,13 @@ int prism2_ap_get_sta_qual(local_info_t *local, struct sockaddr addr[], addr[count].sa_family = ARPHRD_ETHER; memcpy(addr[count].sa_data, sta->addr, ETH_ALEN); if (sta->last_rx_silence == 0) - qual[count].qual = sta->last_rx_signal < 27 ? - 0 : (sta->last_rx_signal - 27) * 92 / 127; + qual[count].qual = (sta->last_rx_signal - 156) == 0 ? + 0 : (sta->last_rx_signal - 156) * 92 / 64; else - qual[count].qual = sta->last_rx_signal - - sta->last_rx_silence - 35; - qual[count].level = HFA384X_LEVEL_TO_dBm(sta->last_rx_signal); - qual[count].noise = HFA384X_LEVEL_TO_dBm(sta->last_rx_silence); + qual[count].qual = (sta->last_rx_signal - + sta->last_rx_silence) * 92 / 64; + qual[count].level = sta->last_rx_signal; + qual[count].noise = sta->last_rx_silence; qual[count].updated = sta->last_rx_updated; sta->last_rx_updated = IW_QUAL_DBM; @@ -2410,13 +2410,13 @@ int prism2_ap_translate_scan(struct net_device *dev, memset(&iwe, 0, sizeof(iwe)); iwe.cmd = IWEVQUAL; if (sta->last_rx_silence == 0) - iwe.u.qual.qual = sta->last_rx_signal < 27 ? - 0 : (sta->last_rx_signal - 27) * 92 / 127; + iwe.u.qual.qual = (sta->last_rx_signal -156) == 0 ? + 0 : (sta->last_rx_signal - 156) * 92 / 64; else - iwe.u.qual.qual = sta->last_rx_signal - - sta->last_rx_silence - 35; - iwe.u.qual.level = HFA384X_LEVEL_TO_dBm(sta->last_rx_signal); - iwe.u.qual.noise = HFA384X_LEVEL_TO_dBm(sta->last_rx_silence); + iwe.u.qual.qual = (sta->last_rx_signal - + sta->last_rx_silence) * 92 / 64; + iwe.u.qual.level = sta->last_rx_signal; + iwe.u.qual.noise = sta->last_rx_silence; iwe.u.qual.updated = sta->last_rx_updated; iwe.len = IW_EV_QUAL_LEN; current_ev = iwe_stream_add_event(info, current_ev, end_buf, diff --git a/drivers/net/wireless/hostap/hostap_config.h b/drivers/net/wireless/hostap/hostap_config.h index 2c8f71f..d37d1fd 100644 --- a/drivers/net/wireless/hostap/hostap_config.h +++ b/drivers/net/wireless/hostap/hostap_config.h @@ -45,4 +45,9 @@ */ /* #define PRISM2_NO_STATION_MODES */ +/* Enable TX power Setting functions + * (min att = -128 , max att = 127) + */ +#define RAW_TXPOWER_SETTING + #endif /* HOSTAP_CONFIG_H */ diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c index a8bddd8..1e13d2a 100644 --- a/drivers/net/wireless/hostap/hostap_hw.c +++ b/drivers/net/wireless/hostap/hostap_hw.c @@ -932,6 +932,7 @@ static int hfa384x_set_rid(struct net_device *dev, u16 rid, void *buf, int len) prism2_hw_reset(dev); } + hostap_restore_power(dev); return res; } diff --git a/drivers/net/wireless/hostap/hostap_info.c b/drivers/net/wireless/hostap/hostap_info.c index d737091..1cb9a79 100644 --- a/drivers/net/wireless/hostap/hostap_info.c +++ b/drivers/net/wireless/hostap/hostap_info.c @@ -433,6 +433,11 @@ static void handle_info_queue_linkstatus(local_info_t *local) } /* Get BSSID if we have a valid AP address */ + + if ( val == HFA384X_LINKSTATUS_CONNECTED || + val == HFA384X_LINKSTATUS_DISCONNECTED ) + hostap_restore_power(local->dev); + if (connected) { netif_carrier_on(local->dev); netif_carrier_on(local->ddev); diff --git a/drivers/net/wireless/hostap/hostap_ioctl.c b/drivers/net/wireless/hostap/hostap_ioctl.c index 12de464..2399f6c 100644 --- a/drivers/net/wireless/hostap/hostap_ioctl.c +++ b/drivers/net/wireless/hostap/hostap_ioctl.c @@ -1477,23 +1477,20 @@ static int prism2_txpower_hfa386x_to_dBm(u16 val) val = 255; tmp = val; - tmp >>= 2; - return -12 - tmp; + return tmp; } static u16 prism2_txpower_dBm_to_hfa386x(int val) { signed char tmp; - if (val > 20) - return 128; - else if (val < -43) + if (val > 127) return 127; + else if (val < -128) + return 128; tmp = val; - tmp = -12 - tmp; - tmp <<= 2; return (unsigned char) tmp; } @@ -4056,3 +4053,35 @@ int hostap_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return ret; } + +/* BUG FIX: Restore power setting value when lost due to F/W bug */ + +int hostap_restore_power(struct net_device *dev) +{ + struct hostap_interface *iface = netdev_priv(dev); + local_info_t *local = iface->local; + + u16 val; + int ret = 0; + + if (local->txpower_type == PRISM2_TXPOWER_OFF) { + val = 0xff; /* use all standby and sleep modes */ + ret = local->func->cmd(dev, HFA384X_CMDCODE_WRITEMIF, + HFA386X_CR_A_D_TEST_MODES2, + &val, NULL); + } + +#ifdef RAW_TXPOWER_SETTING + if (local->txpower_type == PRISM2_TXPOWER_FIXED) { + val = HFA384X_TEST_CFG_BIT_ALC; + local->func->cmd(dev, HFA384X_CMDCODE_TEST | + (HFA384X_TEST_CFG_BITS << 8), 0, &val, NULL); + val = prism2_txpower_dBm_to_hfa386x(local->txpower); + ret = (local->func->cmd(dev, HFA384X_CMDCODE_WRITEMIF, + HFA386X_CR_MANUAL_TX_POWER, &val, NULL)); + } +#endif /* RAW_TXPOWER_SETTING */ + return (ret ? -EOPNOTSUPP : 0); +} + +EXPORT_SYMBOL(hostap_restore_power); diff --git a/include/linux/ath5k_platform.h b/include/linux/ath5k_platform.h new file mode 100644 index 0000000..ec85224 --- /dev/null +++ b/include/linux/ath5k_platform.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2008 Atheros Communications Inc. + * Copyright (c) 2009 Gabor Juhos + * Copyright (c) 2009 Imre Kaloz + * Copyright (c) 2010 Daniel Golle + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _LINUX_ATH5K_PLATFORM_H +#define _LINUX_ATH5K_PLATFORM_H + +#define ATH5K_PLAT_EEP_MAX_WORDS 2048 + +struct ath5k_platform_data { + u16 *eeprom_data; + u8 *macaddr; +}; + +#endif /* _LINUX_ATH5K_PLATFORM_H */ diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h index 16f9742..6e3f54f 100644 --- a/include/linux/ath9k_platform.h +++ b/include/linux/ath9k_platform.h @@ -1,11 +1,19 @@ /* - * ath9k platform data defines + * Copyright (c) 2008 Atheros Communications Inc. + * Copyright (c) 2009 Gabor Juhos + * Copyright (c) 2009 Imre Kaloz * - * Copyright (C) 2008 Gabor Juhos + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #ifndef _LINUX_ATH9K_PLATFORM_H @@ -16,13 +24,14 @@ struct ath9k_platform_data { u16 eeprom_data[ATH9K_PLAT_EEP_MAX_WORDS]; u8 *macaddr; - unsigned long quirk_wndr3700:1; int led_pin; u32 gpio_mask; u32 gpio_val; bool is_clk_25mhz; + int (*get_mac_revision)(void); + int (*external_reset)(void); }; #endif /* _LINUX_ATH9K_PLATFORM_H */ diff --git a/include/linux/atm.h b/include/linux/atm.h index d3b2921..2c63d27 100644 --- a/include/linux/atm.h +++ b/include/linux/atm.h @@ -139,6 +139,9 @@ struct atm_trafprm { int min_pcr; /* minimum PCR in cells per second */ int max_cdv; /* maximum CDV in microseconds */ int max_sdu; /* maximum SDU in bytes */ + int scr; /* sustained rate in cells per second */ + int mbs; /* maximum burst size (MBS) in cells */ + int cdv; /* Cell delay varition */ /* extra params for ABR */ unsigned int icr; /* Initial Cell Rate (24-bit) */ unsigned int tbe; /* Transient Buffer Exposure (24-bit) */ diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h index 7b31863..79e8b9f 100644 --- a/include/linux/if_packet.h +++ b/include/linux/if_packet.h @@ -29,6 +29,8 @@ struct sockaddr_ll { /* These ones are invisible by user level */ #define PACKET_LOOPBACK 5 /* MC/BRD frame looped back */ #define PACKET_FASTROUTE 6 /* Fastrouted frame */ +#define PACKET_MASK_ANY 0xffffffff /* mask for packet type bits */ + /* Packet socket options */ @@ -49,6 +51,7 @@ struct sockaddr_ll { #define PACKET_VNET_HDR 15 #define PACKET_TX_TIMESTAMP 16 #define PACKET_TIMESTAMP 17 +#define PACKET_RECV_TYPE 18 struct tpacket_stats { unsigned int tp_packets; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0decca2..bed87f5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -148,7 +148,7 @@ static inline bool dev_xmit_complete(int rc) */ #if defined(CONFIG_WLAN) || defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) -# if defined(CONFIG_MAC80211_MESH) +# if 1 || defined(CONFIG_MAC80211_MESH) # define LL_MAX_HEADER 128 # else # define LL_MAX_HEADER 96 @@ -1188,6 +1188,7 @@ struct net_device { void *ax25_ptr; /* AX.25 specific data */ struct wireless_dev *ieee80211_ptr; /* IEEE 802.11 specific data, assign before registering */ + void *phy_ptr; /* PHY device specific data */ /* * Cache lines mostly used on receive path (including eth_type_trans()) diff --git a/include/linux/netfilter/nf_conntrack_rtsp.h b/include/linux/netfilter/nf_conntrack_rtsp.h new file mode 100644 index 0000000..0c9e3b8 --- /dev/null +++ b/include/linux/netfilter/nf_conntrack_rtsp.h @@ -0,0 +1,63 @@ +/* + * RTSP extension for IP connection tracking. + * (C) 2003 by Tom Marshall + * based on ip_conntrack_irc.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _IP_CONNTRACK_RTSP_H +#define _IP_CONNTRACK_RTSP_H + +//#define IP_NF_RTSP_DEBUG 1 +#define IP_NF_RTSP_VERSION "0.6.21" + +#ifdef __KERNEL__ +/* port block types */ +typedef enum { + pb_single, /* client_port=x */ + pb_range, /* client_port=x-y */ + pb_discon /* client_port=x/y (rtspbis) */ +} portblock_t; + +/* We record seq number and length of rtsp headers here, all in host order. */ + +/* + * This structure is per expected connection. It is a member of struct + * ip_conntrack_expect. The TCP SEQ for the conntrack expect is stored + * there and we are expected to only store the length of the data which + * needs replaced. If a packet contains multiple RTSP messages, we create + * one expected connection per message. + * + * We use these variables to mark the entire header block. This may seem + * like overkill, but the nature of RTSP requires it. A header may appear + * multiple times in a message. We must treat two Transport headers the + * same as one Transport header with two entries. + */ +struct ip_ct_rtsp_expect +{ + u_int32_t len; /* length of header block */ + portblock_t pbtype; /* Type of port block that was requested */ + u_int16_t loport; /* Port that was requested, low or first */ + u_int16_t hiport; /* Port that was requested, high or second */ +#if 0 + uint method; /* RTSP method */ + uint cseq; /* CSeq from request */ +#endif +}; + +extern unsigned int (*nf_nat_rtsp_hook)(struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + unsigned int matchoff, unsigned int matchlen, + struct ip_ct_rtsp_expect *prtspexp, + struct nf_conntrack_expect *exp); + +extern void (*nf_nat_rtsp_hook_expectfn)(struct nf_conn *ct, struct nf_conntrack_expect *exp); + +#define RTSP_PORT 554 + +#endif /* __KERNEL__ */ + +#endif /* _IP_CONNTRACK_RTSP_H */ diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index 0ce91d5..feda699 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -2,12 +2,15 @@ #define __NF_CONNTRACK_SIP_H__ #ifdef __KERNEL__ +#include + #define SIP_PORT 5060 #define SIP_TIMEOUT 3600 struct nf_ct_sip_master { unsigned int register_cseq; unsigned int invite_cseq; + __be16 forced_dport; }; enum sip_expectation_classes { diff --git a/include/linux/netfilter/xt_layer7.h b/include/linux/netfilter/xt_layer7.h new file mode 100644 index 0000000..c38d3c4 --- /dev/null +++ b/include/linux/netfilter/xt_layer7.h @@ -0,0 +1,14 @@ +#ifndef _XT_LAYER7_H +#define _XT_LAYER7_H + +#define MAX_PATTERN_LEN 8192 +#define MAX_PROTOCOL_LEN 256 + +struct xt_layer7_info { + char protocol[MAX_PROTOCOL_LEN]; + char pattern[MAX_PATTERN_LEN]; + u_int8_t invert; + u_int8_t pkt; +}; + +#endif /* _XT_LAYER7_H */ diff --git a/include/linux/netfilter_helpers.h b/include/linux/netfilter_helpers.h new file mode 100644 index 0000000..903f374 --- /dev/null +++ b/include/linux/netfilter_helpers.h @@ -0,0 +1,133 @@ +/* + * Helpers for netfiler modules. This file provides implementations for basic + * functions such as strncasecmp(), etc. + * + * gcc will warn for defined but unused functions, so we only include the + * functions requested. The following macros are used: + * NF_NEED_STRNCASECMP nf_strncasecmp() + * NF_NEED_STRTOU16 nf_strtou16() + * NF_NEED_STRTOU32 nf_strtou32() + */ +#ifndef _NETFILTER_HELPERS_H +#define _NETFILTER_HELPERS_H + +/* Only include these functions for kernel code. */ +#ifdef __KERNEL__ + +#include +#define iseol(c) ( (c) == '\r' || (c) == '\n' ) + +/* + * The standard strncasecmp() + */ +#ifdef NF_NEED_STRNCASECMP +static int +nf_strncasecmp(const char* s1, const char* s2, u_int32_t len) +{ + if (s1 == NULL || s2 == NULL) + { + if (s1 == NULL && s2 == NULL) + { + return 0; + } + return (s1 == NULL) ? -1 : 1; + } + while (len > 0 && tolower(*s1) == tolower(*s2)) + { + len--; + s1++; + s2++; + } + return ( (len == 0) ? 0 : (tolower(*s1) - tolower(*s2)) ); +} +#endif /* NF_NEED_STRNCASECMP */ + +/* + * Parse a string containing a 16-bit unsigned integer. + * Returns the number of chars used, or zero if no number is found. + */ +#ifdef NF_NEED_STRTOU16 +static int +nf_strtou16(const char* pbuf, u_int16_t* pval) +{ + int n = 0; + + *pval = 0; + while (isdigit(pbuf[n])) + { + *pval = (*pval * 10) + (pbuf[n] - '0'); + n++; + } + + return n; +} +#endif /* NF_NEED_STRTOU16 */ + +/* + * Parse a string containing a 32-bit unsigned integer. + * Returns the number of chars used, or zero if no number is found. + */ +#ifdef NF_NEED_STRTOU32 +static int +nf_strtou32(const char* pbuf, u_int32_t* pval) +{ + int n = 0; + + *pval = 0; + while (pbuf[n] >= '0' && pbuf[n] <= '9') + { + *pval = (*pval * 10) + (pbuf[n] - '0'); + n++; + } + + return n; +} +#endif /* NF_NEED_STRTOU32 */ + +/* + * Given a buffer and length, advance to the next line and mark the current + * line. + */ +#ifdef NF_NEED_NEXTLINE +static int +nf_nextline(char* p, uint len, uint* poff, uint* plineoff, uint* plinelen) +{ + uint off = *poff; + uint physlen = 0; + + if (off >= len) + { + return 0; + } + + while (p[off] != '\n') + { + if (len-off <= 1) + { + return 0; + } + + physlen++; + off++; + } + + /* if we saw a crlf, physlen needs adjusted */ + if (physlen > 0 && p[off] == '\n' && p[off-1] == '\r') + { + physlen--; + } + + /* advance past the newline */ + off++; + + *plineoff = *poff; + *plinelen = physlen; + *poff = off; + + return 1; +} +#endif /* NF_NEED_NEXTLINE */ + +#endif /* __KERNEL__ */ + +#endif /* _NETFILTER_HELPERS_H */ diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 64a5d95..d8ed243 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -93,6 +93,7 @@ struct ipt_ip { #define IPT_F_FRAG 0x01 /* Set if rule is a fragment rule */ #define IPT_F_GOTO 0x02 /* Set if jump is a goto */ #define IPT_F_MASK 0x03 /* All possible flag bits mask. */ +#define IPT_F_NO_DEF_MATCH 0x80 /* Internal: no default match rules present */ /* Values for "inv" field in struct ipt_ip. */ #define IPT_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ diff --git a/include/linux/netfilter_mime.h b/include/linux/netfilter_mime.h new file mode 100644 index 0000000..7eeb183 --- /dev/null +++ b/include/linux/netfilter_mime.h @@ -0,0 +1,89 @@ +/* + * MIME functions for netfilter modules. This file provides implementations + * for basic MIME parsing. MIME headers are used in many protocols, such as + * HTTP, RTSP, SIP, etc. + * + * gcc will warn for defined but unused functions, so we only include the + * functions requested. The following macros are used: + * NF_NEED_MIME_NEXTLINE nf_mime_nextline() + */ +#ifndef _NETFILTER_MIME_H +#define _NETFILTER_MIME_H + +/* Only include these functions for kernel code. */ +#ifdef __KERNEL__ + +#include + +/* + * Given a buffer and length, advance to the next line and mark the current + * line. If the current line is empty, *plinelen will be set to zero. If + * not, it will be set to the actual line length (including CRLF). + * + * 'line' in this context means logical line (includes LWS continuations). + * Returns 1 on success, 0 on failure. + */ +#ifdef NF_NEED_MIME_NEXTLINE +static int +nf_mime_nextline(char* p, uint len, uint* poff, uint* plineoff, uint* plinelen) +{ + uint off = *poff; + uint physlen = 0; + int is_first_line = 1; + + if (off >= len) + { + return 0; + } + + do + { + while (p[off] != '\n') + { + if (len-off <= 1) + { + return 0; + } + + physlen++; + off++; + } + + /* if we saw a crlf, physlen needs adjusted */ + if (physlen > 0 && p[off] == '\n' && p[off-1] == '\r') + { + physlen--; + } + + /* advance past the newline */ + off++; + + /* check for an empty line */ + if (physlen == 0) + { + break; + } + + /* check for colon on the first physical line */ + if (is_first_line) + { + is_first_line = 0; + if (memchr(p+(*poff), ':', physlen) == NULL) + { + return 0; + } + } + } + while (p[off] == ' ' || p[off] == '\t'); + + *plineoff = *poff; + *plinelen = (physlen == 0) ? 0 : (off - *poff); + *poff = off; + + return 1; +} +#endif /* NF_NEED_MIME_NEXTLINE */ + +#endif /* __KERNEL__ */ + +#endif /* _NETFILTER_MIME_H */ diff --git a/include/linux/phy.h b/include/linux/phy.h index 4d3f63a..2580e1b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -332,6 +332,20 @@ struct phy_device { void (*adjust_link)(struct net_device *dev); void (*adjust_state)(struct net_device *dev); + + /* + * By default these point to the original functions + * with the same name. adding them to the phy_device + * allows the phy driver to override them for packet + * mangling if the ethernet driver supports it + * This is required to support some really horrible + * switches such as the Marvell 88E6060 + */ + int (*netif_receive_skb)(struct sk_buff *skb); + int (*netif_rx)(struct sk_buff *skb); + + /* alignment offset for packets */ + int pkt_align; }; #define to_phy_device(d) container_of(d, struct phy_device, dev) @@ -386,9 +400,18 @@ struct phy_driver { */ int (*config_aneg)(struct phy_device *phydev); + /* Determine if autonegotiation is done */ + int (*aneg_done)(struct phy_device *phydev); + /* Determines the negotiated speed and duplex */ int (*read_status)(struct phy_device *phydev); + /* + * Update the value in phydev->link to reflect the + * current link value + */ + int (*update_link)(struct phy_device *phydev); + /* Clears any pending interrupts */ int (*ack_interrupt)(struct phy_device *phydev); @@ -508,6 +531,7 @@ void phy_start_machine(struct phy_device *phydev, void phy_stop_machine(struct phy_device *phydev); int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd); +int phy_ethtool_ioctl(struct phy_device *phydev, void *useraddr); int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd); int phy_start_interrupts(struct phy_device *phydev); diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index c533670..9c5ed47 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -173,8 +173,37 @@ struct tc_sfq_xstats { * * The only reason for this is efficiency, it is possible * to change these parameters in compile time. + * + * If you need to play with these values, use esfq instead. */ +/* ESFQ section */ + +enum +{ + /* traditional */ + TCA_SFQ_HASH_CLASSIC, + TCA_SFQ_HASH_DST, + TCA_SFQ_HASH_SRC, + TCA_SFQ_HASH_FWMARK, + /* conntrack */ + TCA_SFQ_HASH_CTORIGDST, + TCA_SFQ_HASH_CTORIGSRC, + TCA_SFQ_HASH_CTREPLDST, + TCA_SFQ_HASH_CTREPLSRC, + TCA_SFQ_HASH_CTNATCHG, +}; + +struct tc_esfq_qopt +{ + unsigned quantum; /* Bytes per round allocated to flow */ + int perturb_period; /* Period of hash perturbation */ + __u32 limit; /* Maximal packets in queue */ + unsigned divisor; /* Hash divisor */ + unsigned flows; /* Maximal number of flows */ + unsigned hash_kind; /* Hash function to use for flow identification */ +}; + /* RED section */ enum { diff --git a/include/linux/switch.h b/include/linux/switch.h index ef2dc3a..6eefb7f 100644 --- a/include/linux/switch.h +++ b/include/linux/switch.h @@ -36,8 +36,9 @@ enum { SWITCH_ATTR_TYPE, /* device */ SWITCH_ATTR_ID, - SWITCH_ATTR_NAME, SWITCH_ATTR_DEV_NAME, + SWITCH_ATTR_ALIAS, + SWITCH_ATTR_NAME, SWITCH_ATTR_VLANS, SWITCH_ATTR_PORTS, SWITCH_ATTR_CPU_PORT, @@ -146,10 +147,12 @@ struct switch_dev_ops { struct switch_dev { const struct switch_dev_ops *ops; - const char *name; + /* will be automatically filled */ + char devname[IFNAMSIZ]; - /* NB: either devname or netdev must be set */ - const char *devname; + const char *name; + /* NB: either alias or netdev must be set */ + const char *alias; struct net_device *netdev; int ports; diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 582e4ae..b570125 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -91,6 +91,12 @@ extern void addrconf_join_solict(struct net_device *dev, extern void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr); +extern int (*ipv6_dev_get_saddr_hook)(struct net *net, + struct net_device *dev, + const struct in6_addr *daddr, + unsigned int srcprefs, + struct in6_addr *saddr); + static inline unsigned long addrconf_timeout_fixup(u32 timeout, unsigned unit) { diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 5d4f8e5..3262efe 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -134,6 +134,22 @@ struct nf_conn { struct net *ct_net; #endif +#if defined(CONFIG_NETFILTER_XT_MATCH_LAYER7) || \ + defined(CONFIG_NETFILTER_XT_MATCH_LAYER7_MODULE) + struct { + /* + * e.g. "http". NULL before decision. "unknown" after decision + * if no match. + */ + char *app_proto; + /* + * application layer data so far. NULL after match decision. + */ + char *app_data; + unsigned int app_data_len; + } layer7; +#endif + /* Storage reserved for other modules, must be the last member */ union nf_conntrack_proto proto; }; diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig index 6dee7bf..9190ae4 100644 --- a/net/bridge/Kconfig +++ b/net/bridge/Kconfig @@ -6,7 +6,6 @@ config BRIDGE tristate "802.1d Ethernet Bridging" select LLC select STP - depends on IPV6 || IPV6=n ---help--- If you say Y here, then your Linux box will be able to act as an Ethernet bridge, which means that the different Ethernet segments it diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index ee64287..1dfa13e 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -109,7 +109,7 @@ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) /* called with rcu_read_lock */ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0) { - if (should_deliver(to, skb)) { + if (should_deliver(to, skb) && !(to->flags & BR_ISOLATE_MODE)) { if (skb0) deliver_clone(to, skb, __br_forward); else @@ -164,7 +164,8 @@ out: static void br_flood(struct net_bridge *br, struct sk_buff *skb, struct sk_buff *skb0, void (*__packet_hook)(const struct net_bridge_port *p, - struct sk_buff *skb)) + struct sk_buff *skb), + bool forward) { struct net_bridge_port *p; struct net_bridge_port *prev; @@ -172,6 +173,9 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb, prev = NULL; list_for_each_entry_rcu(p, &br->port_list, list) { + if (forward && (p->flags & BR_ISOLATE_MODE)) + continue; + prev = maybe_deliver(prev, p, skb, __packet_hook); if (IS_ERR(prev)) goto out; @@ -195,14 +199,14 @@ out: /* called with rcu_read_lock */ void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb) { - br_flood(br, skb, NULL, __br_deliver); + br_flood(br, skb, NULL, __br_deliver, false); } /* called under bridge lock */ void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, struct sk_buff *skb2) { - br_flood(br, skb, skb2, __br_forward); + br_flood(br, skb, skb2, __br_forward, true); } #ifdef CONFIG_BRIDGE_IGMP_SNOOPING diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index f06ee39..eb0795c 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -64,7 +64,7 @@ int br_handle_frame_finish(struct sk_buff *skb) br_multicast_rcv(br, p, skb)) goto drop; - if (p->state == BR_STATE_LEARNING) + if ((p->state == BR_STATE_LEARNING) && skb->protocol != htons(ETH_P_PAE)) goto drop; BR_INPUT_SKB_CB(skb)->brdev = br->dev; @@ -77,7 +77,11 @@ int br_handle_frame_finish(struct sk_buff *skb) dst = NULL; - if (is_broadcast_ether_addr(dest)) + if (skb->protocol == htons(ETH_P_PAE)) { + skb2 = skb; + /* Do not forward 802.1x/EAP frames */ + skb = NULL; + } else if (is_broadcast_ether_addr(dest)) skb2 = skb; else if (is_multicast_ether_addr(dest)) { mdst = br_mdb_get(br, skb); @@ -93,7 +97,8 @@ int br_handle_frame_finish(struct sk_buff *skb) skb2 = skb; br->dev->stats.multicast++; - } else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) { + } else if ((p->flags & BR_ISOLATE_MODE) || + ((dst = __br_fdb_get(br, dest)) && dst->is_local)) { skb2 = skb; /* Do not forward the packet since it's local. */ skb = NULL; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 857a021..a2b2c76 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -134,6 +134,7 @@ struct net_bridge_port unsigned long flags; #define BR_HAIRPIN_MODE 0x00000001 +#define BR_ISOLATE_MODE 0x00000002 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 6229b62..cde52fe 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -149,6 +149,22 @@ static int store_hairpin_mode(struct net_bridge_port *p, unsigned long v) static BRPORT_ATTR(hairpin_mode, S_IRUGO | S_IWUSR, show_hairpin_mode, store_hairpin_mode); +static ssize_t show_isolate_mode(struct net_bridge_port *p, char *buf) +{ + int isolate_mode = (p->flags & BR_ISOLATE_MODE) ? 1 : 0; + return sprintf(buf, "%d\n", isolate_mode); +} +static ssize_t store_isolate_mode(struct net_bridge_port *p, unsigned long v) +{ + if (v) + p->flags |= BR_ISOLATE_MODE; + else + p->flags &= ~BR_ISOLATE_MODE; + return 0; +} +static BRPORT_ATTR(isolate_mode, S_IRUGO | S_IWUSR, + show_isolate_mode, store_isolate_mode); + #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) { @@ -181,6 +197,7 @@ static struct brport_attribute *brport_attrs[] = { &brport_attr_hold_timer, &brport_attr_flush, &brport_attr_hairpin_mode, + &brport_attr_isolate_mode, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING &brport_attr_multicast_router, #endif diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 1dfc18a..8e584c2 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -248,6 +248,11 @@ config NF_NAT_IRC depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_IRC +config NF_NAT_RTSP + tristate + depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + default NF_NAT && NF_CONNTRACK_RTSP + config NF_NAT_TFTP tristate depends on NF_CONNTRACK && NF_NAT diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index dca2082..5cf62f4 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o +obj-$(CONFIG_NF_NAT_RTSP) += nf_nat_rtsp.o obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 24e556e..70a21fe 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -81,9 +81,14 @@ ip_packet_match(const struct iphdr *ip, #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg))) - if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr, + if (ipinfo->flags & IPT_F_NO_DEF_MATCH) + return true; + + if (FWINV(ipinfo->smsk.s_addr && + (ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr, IPT_INV_SRCIP) || - FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr, + FWINV(ipinfo->dmsk.s_addr && + (ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr, IPT_INV_DSTIP)) { dprintf("Source or dest mismatch.\n"); @@ -134,6 +139,29 @@ ip_packet_match(const struct iphdr *ip, return true; } +static void +ip_checkdefault(struct ipt_ip *ip) +{ + static const char iface_mask[IFNAMSIZ] = {}; + + if (ip->invflags || ip->flags & IPT_F_FRAG) + return; + + if (memcmp(ip->iniface_mask, iface_mask, IFNAMSIZ) != 0) + return; + + if (memcmp(ip->outiface_mask, iface_mask, IFNAMSIZ) != 0) + return; + + if (ip->smsk.s_addr || ip->dmsk.s_addr) + return; + + if (ip->proto) + return; + + ip->flags |= IPT_F_NO_DEF_MATCH; +} + static bool ip_checkentry(const struct ipt_ip *ip) { @@ -284,6 +312,33 @@ struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry) return (void *)entry + entry->next_offset; } +static bool +ipt_handle_default_rule(struct ipt_entry *e, unsigned int *verdict) +{ + struct xt_entry_target *t; + struct xt_standard_target *st; + + if (e->target_offset != sizeof(struct ipt_entry)) + return false; + + if (!(e->ip.flags & IPT_F_NO_DEF_MATCH)) + return false; + + t = ipt_get_target(e); + if (t->u.kernel.target->target) + return false; + + st = (struct xt_standard_target *) t; + if (st->verdict == XT_RETURN) + return false; + + if (st->verdict >= 0) + return false; + + *verdict = (unsigned)(-st->verdict) - 1; + return true; +} + /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int ipt_do_table(struct sk_buff *skb, @@ -308,6 +363,25 @@ ipt_do_table(struct sk_buff *skb, ip = ip_hdr(skb); indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; + + IP_NF_ASSERT(table->valid_hooks & (1 << hook)); + local_bh_disable(); + addend = xt_write_recseq_begin(); + private = table->private; + cpu = smp_processor_id(); + table_base = private->entries[cpu]; + jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; + stackptr = per_cpu_ptr(private->stackptr, cpu); + origptr = *stackptr; + + e = get_entry(table_base, private->hook_entry[hook]); + if (ipt_handle_default_rule(e, &verdict)) { + ADD_COUNTER(e->counters, skb->len, 1); + xt_write_recseq_end(addend); + local_bh_enable(); + return verdict; + } + /* We handle fragments by dealing with the first fragment as * if it was a normal packet. All other fragments are treated * normally, except that they will NEVER match rules that ask @@ -322,18 +396,6 @@ ipt_do_table(struct sk_buff *skb, acpar.family = NFPROTO_IPV4; acpar.hooknum = hook; - IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - local_bh_disable(); - addend = xt_write_recseq_begin(); - private = table->private; - cpu = smp_processor_id(); - table_base = private->entries[cpu]; - jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; - stackptr = per_cpu_ptr(private->stackptr, cpu); - origptr = *stackptr; - - e = get_entry(table_base, private->hook_entry[hook]); - pr_debug("Entering %s(hook %u); sp at %u (UF %p)\n", table->name, hook, origptr, get_entry(table_base, private->underflow[hook])); @@ -561,7 +623,7 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) } static int -check_entry(const struct ipt_entry *e, const char *name) +check_entry(struct ipt_entry *e, const char *name) { const struct xt_entry_target *t; @@ -570,6 +632,8 @@ check_entry(const struct ipt_entry *e, const char *name) return -EINVAL; } + ip_checkdefault(&e->ip); + if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) return -EINVAL; @@ -931,6 +995,7 @@ copy_entries_to_user(unsigned int total_size, const struct xt_table_info *private = table->private; int ret = 0; const void *loc_cpu_entry; + u8 flags; counters = alloc_counters(table); if (IS_ERR(counters)) @@ -962,6 +1027,14 @@ copy_entries_to_user(unsigned int total_size, goto free_counters; } + flags = e->ip.flags & IPT_F_MASK; + if (copy_to_user(userptr + off + + offsetof(struct ipt_entry, ip.flags), + &flags, sizeof(flags)) != 0) { + ret = -EFAULT; + goto free_counters; + } + for (i = sizeof(struct ipt_entry); i < e->target_offset; i += m->u.match_size) { diff --git a/net/ipv4/netfilter/nf_nat_rtsp.c b/net/ipv4/netfilter/nf_nat_rtsp.c new file mode 100644 index 0000000..4af5154 --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_rtsp.c @@ -0,0 +1,496 @@ +/* + * RTSP extension for TCP NAT alteration + * (C) 2003 by Tom Marshall + * based on ip_nat_irc.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Module load syntax: + * insmod nf_nat_rtsp.o ports=port1,port2,...port + * stunaddr=
+ * destaction=[auto|strip|none] + * + * If no ports are specified, the default will be port 554 only. + * + * stunaddr specifies the address used to detect that a client is using STUN. + * If this address is seen in the destination parameter, it is assumed that + * the client has already punched a UDP hole in the firewall, so we don't + * mangle the client_port. If none is specified, it is autodetected. It + * only needs to be set if you have multiple levels of NAT. It should be + * set to the external address that the STUN clients detect. Note that in + * this case, it will not be possible for clients to use UDP with servers + * between the NATs. + * + * If no destaction is specified, auto is used. + * destaction=auto: strip destination parameter if it is not stunaddr. + * destaction=strip: always strip destination parameter (not recommended). + * destaction=none: do not touch destination parameter (not recommended). + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#define NF_NEED_STRNCASECMP +#define NF_NEED_STRTOU16 +#include +#define NF_NEED_MIME_NEXTLINE +#include + +#define INFOP(fmt, args...) printk(KERN_INFO "%s: %s: " fmt, __FILE__, __FUNCTION__ , ## args) +#if 0 +#define DEBUGP(fmt, args...) printk(KERN_DEBUG "%s: %s: " fmt, __FILE__, __FUNCTION__ , ## args) +#else +#define DEBUGP(fmt, args...) +#endif + +#define MAX_PORTS 8 +#define DSTACT_AUTO 0 +#define DSTACT_STRIP 1 +#define DSTACT_NONE 2 + +static char* stunaddr = NULL; +static char* destaction = NULL; + +static u_int32_t extip = 0; +static int dstact = 0; + +MODULE_AUTHOR("Tom Marshall "); +MODULE_DESCRIPTION("RTSP network address translation module"); +MODULE_LICENSE("GPL"); +module_param(stunaddr, charp, 0644); +MODULE_PARM_DESC(stunaddr, "Address for detecting STUN"); +module_param(destaction, charp, 0644); +MODULE_PARM_DESC(destaction, "Action for destination parameter (auto/strip/none)"); + +#define SKIP_WSPACE(ptr,len,off) while(off < len && isspace(*(ptr+off))) { off++; } + +/*** helper functions ***/ + +static void +get_skb_tcpdata(struct sk_buff* skb, char** pptcpdata, uint* ptcpdatalen) +{ + struct iphdr* iph = ip_hdr(skb); + struct tcphdr* tcph = (void *)iph + ip_hdrlen(skb); + + *pptcpdata = (char*)tcph + tcph->doff*4; + *ptcpdatalen = ((char*)skb_transport_header(skb) + skb->len) - *pptcpdata; +} + +/*** nat functions ***/ + +/* + * Mangle the "Transport:" header: + * - Replace all occurences of "client_port=" + * - Handle destination parameter + * + * In: + * ct, ctinfo = conntrack context + * skb = packet + * tranoff = Transport header offset from TCP data + * tranlen = Transport header length (incl. CRLF) + * rport_lo = replacement low port (host endian) + * rport_hi = replacement high port (host endian) + * + * Returns packet size difference. + * + * Assumes that a complete transport header is present, ending with CR or LF + */ +static int +rtsp_mangle_tran(enum ip_conntrack_info ctinfo, + struct nf_conntrack_expect* exp, + struct ip_ct_rtsp_expect* prtspexp, + struct sk_buff* skb, uint tranoff, uint tranlen) +{ + char* ptcp; + uint tcplen; + char* ptran; + char rbuf1[16]; /* Replacement buffer (one port) */ + uint rbuf1len; /* Replacement len (one port) */ + char rbufa[16]; /* Replacement buffer (all ports) */ + uint rbufalen; /* Replacement len (all ports) */ + u_int32_t newip; + u_int16_t loport, hiport; + uint off = 0; + uint diff; /* Number of bytes we removed */ + + struct nf_conn *ct = exp->master; + struct nf_conntrack_tuple *t; + + char szextaddr[15+1]; + uint extaddrlen; + int is_stun; + + get_skb_tcpdata(skb, &ptcp, &tcplen); + ptran = ptcp+tranoff; + + if (tranoff+tranlen > tcplen || tcplen-tranoff < tranlen || + tranlen < 10 || !iseol(ptran[tranlen-1]) || + nf_strncasecmp(ptran, "Transport:", 10) != 0) + { + INFOP("sanity check failed\n"); + return 0; + } + off += 10; + SKIP_WSPACE(ptcp+tranoff, tranlen, off); + + newip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip; + t = &exp->tuple; + t->dst.u3.ip = newip; + + extaddrlen = extip ? sprintf(szextaddr, "%u.%u.%u.%u", &extip) + : sprintf(szextaddr, "%u.%u.%u.%u", &newip); + DEBUGP("stunaddr=%s (%s)\n", szextaddr, (extip?"forced":"auto")); + + rbuf1len = rbufalen = 0; + switch (prtspexp->pbtype) + { + case pb_single: + for (loport = prtspexp->loport; loport != 0; loport++) /* XXX: improper wrap? */ + { + t->dst.u.udp.port = htons(loport); + if (nf_ct_expect_related(exp) == 0) + { + DEBUGP("using port %hu\n", loport); + break; + } + } + if (loport != 0) + { + rbuf1len = sprintf(rbuf1, "%hu", loport); + rbufalen = sprintf(rbufa, "%hu", loport); + } + break; + case pb_range: + for (loport = prtspexp->loport; loport != 0; loport += 2) /* XXX: improper wrap? */ + { + t->dst.u.udp.port = htons(loport); + if (nf_ct_expect_related(exp) == 0) + { + hiport = loport + ~exp->mask.src.u.udp.port; + DEBUGP("using ports %hu-%hu\n", loport, hiport); + break; + } + } + if (loport != 0) + { + rbuf1len = sprintf(rbuf1, "%hu", loport); + rbufalen = sprintf(rbufa, "%hu-%hu", loport, loport+1); + } + break; + case pb_discon: + for (loport = prtspexp->loport; loport != 0; loport++) /* XXX: improper wrap? */ + { + t->dst.u.udp.port = htons(loport); + if (nf_ct_expect_related(exp) == 0) + { + DEBUGP("using port %hu (1 of 2)\n", loport); + break; + } + } + for (hiport = prtspexp->hiport; hiport != 0; hiport++) /* XXX: improper wrap? */ + { + t->dst.u.udp.port = htons(hiport); + if (nf_ct_expect_related(exp) == 0) + { + DEBUGP("using port %hu (2 of 2)\n", hiport); + break; + } + } + if (loport != 0 && hiport != 0) + { + rbuf1len = sprintf(rbuf1, "%hu", loport); + if (hiport == loport+1) + { + rbufalen = sprintf(rbufa, "%hu-%hu", loport, hiport); + } + else + { + rbufalen = sprintf(rbufa, "%hu/%hu", loport, hiport); + } + } + break; + } + + if (rbuf1len == 0) + { + return 0; /* cannot get replacement port(s) */ + } + + /* Transport: tran;field;field=val,tran;field;field=val,... */ + while (off < tranlen) + { + uint saveoff; + const char* pparamend; + uint nextparamoff; + + pparamend = memchr(ptran+off, ',', tranlen-off); + pparamend = (pparamend == NULL) ? ptran+tranlen : pparamend+1; + nextparamoff = pparamend-ptcp; + + /* + * We pass over each param twice. On the first pass, we look for a + * destination= field. It is handled by the security policy. If it + * is present, allowed, and equal to our external address, we assume + * that STUN is being used and we leave the client_port= field alone. + */ + is_stun = 0; + saveoff = off; + while (off < nextparamoff) + { + const char* pfieldend; + uint nextfieldoff; + + pfieldend = memchr(ptran+off, ';', nextparamoff-off); + nextfieldoff = (pfieldend == NULL) ? nextparamoff : pfieldend-ptran+1; + + if (dstact != DSTACT_NONE && strncmp(ptran+off, "destination=", 12) == 0) + { + if (strncmp(ptran+off+12, szextaddr, extaddrlen) == 0) + { + is_stun = 1; + } + if (dstact == DSTACT_STRIP || (dstact == DSTACT_AUTO && !is_stun)) + { + diff = nextfieldoff-off; + if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + off, diff, NULL, 0)) + { + /* mangle failed, all we can do is bail */ + nf_ct_unexpect_related(exp); + return 0; + } + get_skb_tcpdata(skb, &ptcp, &tcplen); + ptran = ptcp+tranoff; + tranlen -= diff; + nextparamoff -= diff; + nextfieldoff -= diff; + } + } + + off = nextfieldoff; + } + if (is_stun) + { + continue; + } + off = saveoff; + while (off < nextparamoff) + { + const char* pfieldend; + uint nextfieldoff; + + pfieldend = memchr(ptran+off, ';', nextparamoff-off); + nextfieldoff = (pfieldend == NULL) ? nextparamoff : pfieldend-ptran+1; + + if (strncmp(ptran+off, "client_port=", 12) == 0) + { + u_int16_t port; + uint numlen; + uint origoff; + uint origlen; + char* rbuf = rbuf1; + uint rbuflen = rbuf1len; + + off += 12; + origoff = (ptran-ptcp)+off; + origlen = 0; + numlen = nf_strtou16(ptran+off, &port); + off += numlen; + origlen += numlen; + if (port != prtspexp->loport) + { + DEBUGP("multiple ports found, port %hu ignored\n", port); + } + else + { + if (ptran[off] == '-' || ptran[off] == '/') + { + off++; + origlen++; + numlen = nf_strtou16(ptran+off, &port); + off += numlen; + origlen += numlen; + rbuf = rbufa; + rbuflen = rbufalen; + } + + /* + * note we cannot just memcpy() if the sizes are the same. + * the mangle function does skb resizing, checks for a + * cloned skb, and updates the checksums. + * + * parameter 4 below is offset from start of tcp data. + */ + diff = origlen-rbuflen; + if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + origoff, origlen, rbuf, rbuflen)) + { + /* mangle failed, all we can do is bail */ + nf_ct_unexpect_related(exp); + return 0; + } + get_skb_tcpdata(skb, &ptcp, &tcplen); + ptran = ptcp+tranoff; + tranlen -= diff; + nextparamoff -= diff; + nextfieldoff -= diff; + } + } + + off = nextfieldoff; + } + + off = nextparamoff; + } + + return 1; +} + +static uint +help_out(struct sk_buff *skb, enum ip_conntrack_info ctinfo, + unsigned int matchoff, unsigned int matchlen, struct ip_ct_rtsp_expect* prtspexp, + struct nf_conntrack_expect* exp) +{ + char* ptcp; + uint tcplen; + uint hdrsoff; + uint hdrslen; + uint lineoff; + uint linelen; + uint off; + + //struct iphdr* iph = (struct iphdr*)skb->nh.iph; + //struct tcphdr* tcph = (struct tcphdr*)((void*)iph + iph->ihl*4); + + get_skb_tcpdata(skb, &ptcp, &tcplen); + hdrsoff = matchoff;//exp->seq - ntohl(tcph->seq); + hdrslen = matchlen; + off = hdrsoff; + DEBUGP("NAT rtsp help_out\n"); + + while (nf_mime_nextline(ptcp, hdrsoff+hdrslen, &off, &lineoff, &linelen)) + { + if (linelen == 0) + { + break; + } + if (off > hdrsoff+hdrslen) + { + INFOP("!! overrun !!"); + break; + } + DEBUGP("hdr: len=%u, %.*s", linelen, (int)linelen, ptcp+lineoff); + + if (nf_strncasecmp(ptcp+lineoff, "Transport:", 10) == 0) + { + uint oldtcplen = tcplen; + DEBUGP("hdr: Transport\n"); + if (!rtsp_mangle_tran(ctinfo, exp, prtspexp, skb, lineoff, linelen)) + { + DEBUGP("hdr: Transport mangle failed"); + break; + } + get_skb_tcpdata(skb, &ptcp, &tcplen); + hdrslen -= (oldtcplen-tcplen); + off -= (oldtcplen-tcplen); + lineoff -= (oldtcplen-tcplen); + linelen -= (oldtcplen-tcplen); + DEBUGP("rep: len=%u, %.*s", linelen, (int)linelen, ptcp+lineoff); + } + } + + return NF_ACCEPT; +} + +static unsigned int +help(struct sk_buff *skb, enum ip_conntrack_info ctinfo, + unsigned int matchoff, unsigned int matchlen, struct ip_ct_rtsp_expect* prtspexp, + struct nf_conntrack_expect* exp) +{ + int dir = CTINFO2DIR(ctinfo); + int rc = NF_ACCEPT; + + switch (dir) + { + case IP_CT_DIR_ORIGINAL: + rc = help_out(skb, ctinfo, matchoff, matchlen, prtspexp, exp); + break; + case IP_CT_DIR_REPLY: + DEBUGP("unmangle ! %u\n", ctinfo); + /* XXX: unmangle */ + rc = NF_ACCEPT; + break; + } + //UNLOCK_BH(&ip_rtsp_lock); + + return rc; +} + +static void expected(struct nf_conn* ct, struct nf_conntrack_expect *exp) +{ + struct nf_nat_multi_range_compat mr; + u_int32_t newdstip, newsrcip, newip; + + struct nf_conn *master = ct->master; + + newdstip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; + newsrcip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; + //FIXME (how to port that ?) + //code from 2.4 : newip = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) ? newsrcip : newdstip; + newip = newdstip; + + DEBUGP("newsrcip=%u.%u.%u.%u, newdstip=%u.%u.%u.%u, newip=%u.%u.%u.%u\n", + &newsrcip, &newdstip, &newip); + + mr.rangesize = 1; + // We don't want to manip the per-protocol, just the IPs. + mr.range[0].flags = IP_NAT_RANGE_MAP_IPS; + mr.range[0].min_ip = mr.range[0].max_ip = newip; + + nf_nat_setup_info(ct, &mr.range[0], IP_NAT_MANIP_DST); +} + + +static void __exit fini(void) +{ + nf_nat_rtsp_hook = NULL; + nf_nat_rtsp_hook_expectfn = NULL; + synchronize_net(); +} + +static int __init init(void) +{ + printk("nf_nat_rtsp v" IP_NF_RTSP_VERSION " loading\n"); + + BUG_ON(nf_nat_rtsp_hook); + nf_nat_rtsp_hook = help; + nf_nat_rtsp_hook_expectfn = &expected; + + if (stunaddr != NULL) + extip = in_aton(stunaddr); + + if (destaction != NULL) { + if (strcmp(destaction, "auto") == 0) + dstact = DSTACT_AUTO; + + if (strcmp(destaction, "strip") == 0) + dstact = DSTACT_STRIP; + + if (strcmp(destaction, "none") == 0) + dstact = DSTACT_NONE; + } + + return 0; +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index e40cf78..e5856b0 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -73,6 +73,7 @@ static int map_addr(struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + struct nf_conn_help *help = nfct_help(ct); char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; unsigned int buflen; __be32 newaddr; @@ -85,7 +86,8 @@ static int map_addr(struct sk_buff *skb, unsigned int dataoff, } else if (ct->tuplehash[dir].tuple.dst.u3.ip == addr->ip && ct->tuplehash[dir].tuple.dst.u.udp.port == port) { newaddr = ct->tuplehash[!dir].tuple.src.u3.ip; - newport = ct->tuplehash[!dir].tuple.src.u.udp.port; + newport = help->help.ct_sip_info.forced_dport ? : + ct->tuplehash[!dir].tuple.src.u.udp.port; } else return 1; @@ -121,6 +123,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + struct nf_conn_help *help = nfct_help(ct); unsigned int coff, matchoff, matchlen; enum sip_header_types hdr; union nf_inet_addr addr; @@ -229,6 +232,20 @@ next: !map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_TO)) return NF_DROP; + /* Mangle destination port for Cisco phones, then fix up checksums */ + if (dir == IP_CT_DIR_REPLY && help->help.ct_sip_info.forced_dport) { + struct udphdr *uh; + + if (!skb_make_writable(skb, skb->len)) + return NF_DROP; + + uh = (struct udphdr *)(skb->data + ip_hdrlen(skb)); + uh->dest = help->help.ct_sip_info.forced_dport; + + if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, 0, 0, NULL, 0)) + return NF_DROP; + } + return NF_ACCEPT; } @@ -280,8 +297,10 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + struct nf_conn_help *help = nfct_help(ct); __be32 newip; u_int16_t port; + __be16 srcport; char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; unsigned buflen; @@ -294,8 +313,9 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff, /* If the signalling port matches the connection's source port in the * original direction, try to use the destination port in the opposite * direction. */ - if (exp->tuple.dst.u.udp.port == - ct->tuplehash[dir].tuple.src.u.udp.port) + srcport = help->help.ct_sip_info.forced_dport ? : + ct->tuplehash[dir].tuple.src.u.udp.port; + if (exp->tuple.dst.u.udp.port == srcport) port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port); else port = ntohs(exp->tuple.dst.u.udp.port); diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 686934a..049f35e 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -40,3 +40,4 @@ obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-y += addrconf_core.o exthdrs_core.o obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o +obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_stubs.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 498b927..6ad07b6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1106,7 +1106,7 @@ out: return ret; } -int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev, +static int __ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev, const struct in6_addr *daddr, unsigned int prefs, struct in6_addr *saddr) { @@ -1231,7 +1231,6 @@ try_nextdev: in6_ifa_put(hiscore->ifa); return 0; } -EXPORT_SYMBOL(ipv6_dev_get_saddr); int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, unsigned char banned_flags) @@ -4705,6 +4704,9 @@ int __init addrconf_init(void) ipv6_addr_label_rtnl_register(); + BUG_ON(ipv6_dev_get_saddr_hook != NULL); + rcu_assign_pointer(ipv6_dev_get_saddr_hook, __ipv6_dev_get_saddr); + return 0; errout: rtnl_af_unregister(&inet6_ops); @@ -4723,6 +4725,9 @@ void addrconf_cleanup(void) struct net_device *dev; int i; + rcu_assign_pointer(ipv6_dev_get_saddr_hook, NULL); + synchronize_rcu(); + unregister_netdevice_notifier(&ipv6_dev_notf); unregister_pernet_subsys(&addrconf_ops); ipv6_addr_label_cleanup(); diff --git a/net/ipv6/inet6_stubs.c b/net/ipv6/inet6_stubs.c new file mode 100644 index 0000000..5449781 --- /dev/null +++ b/net/ipv6/inet6_stubs.c @@ -0,0 +1,27 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include + +int (*ipv6_dev_get_saddr_hook)(struct net *net, struct net_device *dev, + const struct in6_addr *daddr, unsigned int srcprefs, + struct in6_addr *saddr); + +EXPORT_SYMBOL(ipv6_dev_get_saddr_hook); + +int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev, + const struct in6_addr *daddr, unsigned int prefs, + struct in6_addr *saddr) +{ + typeof(ipv6_dev_get_saddr_hook) dev_get_saddr = rcu_dereference(ipv6_dev_get_saddr_hook); + + if (dev_get_saddr) + return dev_get_saddr(net, dst_dev, daddr, prefs, saddr); + + return -EADDRNOTAVAIL; +} +EXPORT_SYMBOL(ipv6_dev_get_saddr); + diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 32bff6d..e1c6a46 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -290,6 +290,16 @@ config NF_CONNTRACK_TFTP To compile it as a module, choose M here. If unsure, say N. +config NF_CONNTRACK_RTSP + tristate "RTSP protocol support" + depends on NF_CONNTRACK + help + Support the RTSP protocol. This allows UDP transports to be setup + properly, including RTP and RDT. + + If you want to compile it as a module, say 'M' here and read + Documentation/modules.txt. If unsure, say 'Y'. + config NF_CT_NETLINK tristate 'Connection tracking netlink interface' select NETFILTER_NETLINK @@ -834,6 +844,27 @@ config NETFILTER_XT_MATCH_IPVS If unsure, say N. +config NETFILTER_XT_MATCH_LAYER7 + tristate '"layer7" match support' + depends on EXPERIMENTAL + depends on NETFILTER_XTABLES + depends on NETFILTER_ADVANCED + depends on NF_CONNTRACK + help + Say Y if you want to be able to classify connections (and their + packets) based on regular expression matching of their application + layer data. This is one way to classify applications such as + peer-to-peer filesharing systems that do not always use the same + port. + + To compile it as a module, choose M here. If unsure, say N. + +config NETFILTER_XT_MATCH_LAYER7_DEBUG + bool 'Layer 7 debugging output' + depends on NETFILTER_XT_MATCH_LAYER7 + help + Say Y to get lots of debugging output. + config NETFILTER_XT_MATCH_LENGTH tristate '"length" match support' depends on NETFILTER_ADVANCED @@ -1020,6 +1051,12 @@ config NETFILTER_XT_MATCH_STATE To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_LAYER7_DEBUG + bool 'Layer 7 debugging output' + depends on NETFILTER_XT_MATCH_LAYER7 + help + Say Y to get lots of debugging output. + config NETFILTER_XT_MATCH_STATISTIC tristate '"statistic" match support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 1a02853..38a0bf4 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o +obj-$(CONFIG_NF_CONNTRACK_RTSP) += nf_conntrack_rtsp.o # transparent proxy support obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o @@ -102,6 +103,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT) += xt_recent.o obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o obj-$(CONFIG_NETFILTER_XT_MATCH_SOCKET) += xt_socket.o obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o +obj-$(CONFIG_NETFILTER_XT_MATCH_LAYER7) += xt_layer7.o obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f7af8b8..551a350 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -213,6 +213,14 @@ destroy_conntrack(struct nf_conntrack *nfct) * too. */ nf_ct_remove_expectations(ct); + #if defined(CONFIG_NETFILTER_XT_MATCH_LAYER7) || defined(CONFIG_NETFILTER_XT_MATCH_LAYER7_MODULE) + if(ct->layer7.app_proto) + kfree(ct->layer7.app_proto); + if(ct->layer7.app_data) + kfree(ct->layer7.app_data); + #endif + + /* We overload first tuple to link into unconfirmed list. */ if (!nf_ct_is_confirmed(ct)) { BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode)); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 37bf943..554a590 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -29,6 +29,9 @@ #include #include +/* Do not check the TCP window for incoming packets */ +static int nf_ct_tcp_no_window_check __read_mostly = 1; + /* "Be conservative in what you do, be liberal in what you accept from others." If it's non-zero, we mark only out of window RST segments as INVALID. */ @@ -524,6 +527,9 @@ static bool tcp_in_window(const struct nf_conn *ct, s16 receiver_offset; bool res; + if (nf_ct_tcp_no_window_check) + return true; + /* * Get the required data from the packet. */ @@ -1321,6 +1327,13 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = proc_dointvec, }, { + .procname = "nf_conntrack_tcp_no_window_check", + .data = &nf_ct_tcp_no_window_check, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .procname = "nf_conntrack_tcp_be_liberal", .data = &nf_ct_tcp_be_liberal, .maxlen = sizeof(unsigned int), diff --git a/net/netfilter/nf_conntrack_rtsp.c b/net/netfilter/nf_conntrack_rtsp.c new file mode 100644 index 0000000..84cdec5 --- /dev/null +++ b/net/netfilter/nf_conntrack_rtsp.c @@ -0,0 +1,517 @@ +/* + * RTSP extension for IP connection tracking + * (C) 2003 by Tom Marshall + * based on ip_conntrack_irc.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Module load syntax: + * insmod nf_conntrack_rtsp.o ports=port1,port2,...port + * max_outstanding=n setup_timeout=secs + * + * If no ports are specified, the default will be port 554. + * + * With max_outstanding you can define the maximum number of not yet + * answered SETUP requests per RTSP session (default 8). + * With setup_timeout you can specify how long the system waits for + * an expected data channel (default 300 seconds). + * + * 2005-02-13: Harald Welte + * - port to 2.6 + * - update to recent post-2.6.11 api changes + * 2006-09-14: Steven Van Acker + * - removed calls to NAT code from conntrack helper: NAT no longer needed to use rtsp-conntrack + * 2007-04-18: Michael Guntsche + * - Port to new NF API + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define NF_NEED_STRNCASECMP +#define NF_NEED_STRTOU16 +#define NF_NEED_STRTOU32 +#define NF_NEED_NEXTLINE +#include +#define NF_NEED_MIME_NEXTLINE +#include + +#include +#define MAX_SIMUL_SETUP 8 /* XXX: use max_outstanding */ +#define INFOP(fmt, args...) printk(KERN_INFO "%s: %s: " fmt, __FILE__, __FUNCTION__ , ## args) +#if 0 +#define DEBUGP(fmt, args...) printk(KERN_DEBUG "%s: %s: " fmt, __FILE__, __FUNCTION__ , ## args) +#else +#define DEBUGP(fmt, args...) +#endif + +#define MAX_PORTS 8 +static int ports[MAX_PORTS]; +static int num_ports = 0; +static int max_outstanding = 8; +static unsigned int setup_timeout = 300; + +MODULE_AUTHOR("Tom Marshall "); +MODULE_DESCRIPTION("RTSP connection tracking module"); +MODULE_LICENSE("GPL"); +module_param_array(ports, int, &num_ports, 0400); +MODULE_PARM_DESC(ports, "port numbers of RTSP servers"); +module_param(max_outstanding, int, 0400); +MODULE_PARM_DESC(max_outstanding, "max number of outstanding SETUP requests per RTSP session"); +module_param(setup_timeout, int, 0400); +MODULE_PARM_DESC(setup_timeout, "timeout on for unestablished data channels"); + +static char *rtsp_buffer; +static DEFINE_SPINLOCK(rtsp_buffer_lock); + +unsigned int (*nf_nat_rtsp_hook)(struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + unsigned int matchoff, unsigned int matchlen,struct ip_ct_rtsp_expect* prtspexp, + struct nf_conntrack_expect *exp); +void (*nf_nat_rtsp_hook_expectfn)(struct nf_conn *ct, struct nf_conntrack_expect *exp); + +EXPORT_SYMBOL_GPL(nf_nat_rtsp_hook); + +/* + * Max mappings we will allow for one RTSP connection (for RTP, the number + * of allocated ports is twice this value). Note that SMIL burns a lot of + * ports so keep this reasonably high. If this is too low, you will see a + * lot of "no free client map entries" messages. + */ +#define MAX_PORT_MAPS 16 + +/*** default port list was here in the masq code: 554, 3030, 4040 ***/ + +#define SKIP_WSPACE(ptr,len,off) while(off < len && isspace(*(ptr+off))) { off++; } + +/* + * Parse an RTSP packet. + * + * Returns zero if parsing failed. + * + * Parameters: + * IN ptcp tcp data pointer + * IN tcplen tcp data len + * IN/OUT ptcpoff points to current tcp offset + * OUT phdrsoff set to offset of rtsp headers + * OUT phdrslen set to length of rtsp headers + * OUT pcseqoff set to offset of CSeq header + * OUT pcseqlen set to length of CSeq header + */ +static int +rtsp_parse_message(char* ptcp, uint tcplen, uint* ptcpoff, + uint* phdrsoff, uint* phdrslen, + uint* pcseqoff, uint* pcseqlen, + uint* transoff, uint* translen) +{ + uint entitylen = 0; + uint lineoff; + uint linelen; + + if (!nf_nextline(ptcp, tcplen, ptcpoff, &lineoff, &linelen)) + return 0; + + *phdrsoff = *ptcpoff; + while (nf_mime_nextline(ptcp, tcplen, ptcpoff, &lineoff, &linelen)) { + if (linelen == 0) { + if (entitylen > 0) + *ptcpoff += min(entitylen, tcplen - *ptcpoff); + break; + } + if (lineoff+linelen > tcplen) { + INFOP("!! overrun !!\n"); + break; + } + + if (nf_strncasecmp(ptcp+lineoff, "CSeq:", 5) == 0) { + *pcseqoff = lineoff; + *pcseqlen = linelen; + } + + if (nf_strncasecmp(ptcp+lineoff, "Transport:", 10) == 0) { + *transoff = lineoff; + *translen = linelen; + } + + if (nf_strncasecmp(ptcp+lineoff, "Content-Length:", 15) == 0) { + uint off = lineoff+15; + SKIP_WSPACE(ptcp+lineoff, linelen, off); + nf_strtou32(ptcp+off, &entitylen); + } + } + *phdrslen = (*ptcpoff) - (*phdrsoff); + + return 1; +} + +/* + * Find lo/hi client ports (if any) in transport header + * In: + * ptcp, tcplen = packet + * tranoff, tranlen = buffer to search + * + * Out: + * pport_lo, pport_hi = lo/hi ports (host endian) + * + * Returns nonzero if any client ports found + * + * Note: it is valid (and expected) for the client to request multiple + * transports, so we need to parse the entire line. + */ +static int +rtsp_parse_transport(char* ptran, uint tranlen, + struct ip_ct_rtsp_expect* prtspexp) +{ + int rc = 0; + uint off = 0; + + if (tranlen < 10 || !iseol(ptran[tranlen-1]) || + nf_strncasecmp(ptran, "Transport:", 10) != 0) { + INFOP("sanity check failed\n"); + return 0; + } + + DEBUGP("tran='%.*s'\n", (int)tranlen, ptran); + off += 10; + SKIP_WSPACE(ptran, tranlen, off); + + /* Transport: tran;field;field=val,tran;field;field=val,... */ + while (off < tranlen) { + const char* pparamend; + uint nextparamoff; + + pparamend = memchr(ptran+off, ',', tranlen-off); + pparamend = (pparamend == NULL) ? ptran+tranlen : pparamend+1; + nextparamoff = pparamend-ptran; + + while (off < nextparamoff) { + const char* pfieldend; + uint nextfieldoff; + + pfieldend = memchr(ptran+off, ';', nextparamoff-off); + nextfieldoff = (pfieldend == NULL) ? nextparamoff : pfieldend-ptran+1; + + if (strncmp(ptran+off, "client_port=", 12) == 0) { + u_int16_t port; + uint numlen; + + off += 12; + numlen = nf_strtou16(ptran+off, &port); + off += numlen; + if (prtspexp->loport != 0 && prtspexp->loport != port) + DEBUGP("multiple ports found, port %hu ignored\n", port); + else { + DEBUGP("lo port found : %hu\n", port); + prtspexp->loport = prtspexp->hiport = port; + if (ptran[off] == '-') { + off++; + numlen = nf_strtou16(ptran+off, &port); + off += numlen; + prtspexp->pbtype = pb_range; + prtspexp->hiport = port; + + // If we have a range, assume rtp: + // loport must be even, hiport must be loport+1 + if ((prtspexp->loport & 0x0001) != 0 || + prtspexp->hiport != prtspexp->loport+1) { + DEBUGP("incorrect range: %hu-%hu, correcting\n", + prtspexp->loport, prtspexp->hiport); + prtspexp->loport &= 0xfffe; + prtspexp->hiport = prtspexp->loport+1; + } + } else if (ptran[off] == '/') { + off++; + numlen = nf_strtou16(ptran+off, &port); + off += numlen; + prtspexp->pbtype = pb_discon; + prtspexp->hiport = port; + } + rc = 1; + } + } + + /* + * Note we don't look for the destination parameter here. + * If we are using NAT, the NAT module will handle it. If not, + * and the client is sending packets elsewhere, the expectation + * will quietly time out. + */ + + off = nextfieldoff; + } + + off = nextparamoff; + } + + return rc; +} + +void expected(struct nf_conn *ct, struct nf_conntrack_expect *exp) +{ + if(nf_nat_rtsp_hook_expectfn) { + nf_nat_rtsp_hook_expectfn(ct,exp); + } +} + +/*** conntrack functions ***/ + +/* outbound packet: client->server */ + +static inline int +help_out(struct sk_buff *skb, unsigned char *rb_ptr, unsigned int datalen, + struct nf_conn *ct, enum ip_conntrack_info ctinfo) +{ + struct ip_ct_rtsp_expect expinfo; + + int dir = CTINFO2DIR(ctinfo); /* = IP_CT_DIR_ORIGINAL */ + //struct tcphdr* tcph = (void*)iph + iph->ihl * 4; + //uint tcplen = pktlen - iph->ihl * 4; + char* pdata = rb_ptr; + //uint datalen = tcplen - tcph->doff * 4; + uint dataoff = 0; + int ret = NF_ACCEPT; + + struct nf_conntrack_expect *exp; + + __be16 be_loport; + + memset(&expinfo, 0, sizeof(expinfo)); + + while (dataoff < datalen) { + uint cmdoff = dataoff; + uint hdrsoff = 0; + uint hdrslen = 0; + uint cseqoff = 0; + uint cseqlen = 0; + uint transoff = 0; + uint translen = 0; + uint off; + + if (!rtsp_parse_message(pdata, datalen, &dataoff, + &hdrsoff, &hdrslen, + &cseqoff, &cseqlen, + &transoff, &translen)) + break; /* not a valid message */ + + if (strncmp(pdata+cmdoff, "SETUP ", 6) != 0) + continue; /* not a SETUP message */ + DEBUGP("found a setup message\n"); + + off = 0; + if(translen) { + rtsp_parse_transport(pdata+transoff, translen, &expinfo); + } + + if (expinfo.loport == 0) { + DEBUGP("no udp transports found\n"); + continue; /* no udp transports found */ + } + + DEBUGP("udp transport found, ports=(%d,%hu,%hu)\n", + (int)expinfo.pbtype, expinfo.loport, expinfo.hiport); + + exp = nf_ct_expect_alloc(ct); + if (!exp) { + ret = NF_DROP; + goto out; + } + + be_loport = htons(expinfo.loport); + + nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, + ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, &ct->tuplehash[!dir].tuple.dst.u3, + IPPROTO_UDP, NULL, &be_loport); + + exp->master = ct; + + exp->expectfn = expected; + exp->flags = 0; + + if (expinfo.pbtype == pb_range) { + DEBUGP("Changing expectation mask to handle multiple ports\n"); + exp->mask.src.u.udp.port = 0xfffe; + } + + DEBUGP("expect_related %u.%u.%u.%u:%u-%u.%u.%u.%u:%u\n", + NIPQUAD(exp->tuple.src.u3.ip), + ntohs(exp->tuple.src.u.udp.port), + NIPQUAD(exp->tuple.dst.u3.ip), + ntohs(exp->tuple.dst.u.udp.port)); + + if (nf_nat_rtsp_hook) + /* pass the request off to the nat helper */ + ret = nf_nat_rtsp_hook(skb, ctinfo, hdrsoff, hdrslen, &expinfo, exp); + else if (nf_ct_expect_related(exp) != 0) { + INFOP("nf_ct_expect_related failed\n"); + ret = NF_DROP; + } + nf_ct_expect_put(exp); + goto out; + } +out: + + return ret; +} + + +static inline int +help_in(struct sk_buff *skb, size_t pktlen, + struct nf_conn* ct, enum ip_conntrack_info ctinfo) +{ + return NF_ACCEPT; +} + +static int help(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, enum ip_conntrack_info ctinfo) +{ + struct tcphdr _tcph, *th; + unsigned int dataoff, datalen; + char *rb_ptr; + int ret = NF_DROP; + + /* Until there's been traffic both ways, don't look in packets. */ + if (ctinfo != IP_CT_ESTABLISHED && + ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { + DEBUGP("conntrackinfo = %u\n", ctinfo); + return NF_ACCEPT; + } + + /* Not whole TCP header? */ + th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); + + if (!th) + return NF_ACCEPT; + + /* No data ? */ + dataoff = protoff + th->doff*4; + datalen = skb->len - dataoff; + if (dataoff >= skb->len) + return NF_ACCEPT; + + spin_lock_bh(&rtsp_buffer_lock); + rb_ptr = skb_header_pointer(skb, dataoff, + skb->len - dataoff, rtsp_buffer); + BUG_ON(rb_ptr == NULL); + +#if 0 + /* Checksum invalid? Ignore. */ + /* FIXME: Source route IP option packets --RR */ + if (tcp_v4_check(tcph, tcplen, iph->saddr, iph->daddr, + csum_partial((char*)tcph, tcplen, 0))) + { + DEBUGP("bad csum: %p %u %u.%u.%u.%u %u.%u.%u.%u\n", + tcph, tcplen, NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); + return NF_ACCEPT; + } +#endif + + switch (CTINFO2DIR(ctinfo)) { + case IP_CT_DIR_ORIGINAL: + ret = help_out(skb, rb_ptr, datalen, ct, ctinfo); + break; + case IP_CT_DIR_REPLY: + DEBUGP("IP_CT_DIR_REPLY\n"); + /* inbound packet: server->client */ + ret = NF_ACCEPT; + break; + } + + spin_unlock_bh(&rtsp_buffer_lock); + + return ret; +} + +static struct nf_conntrack_helper rtsp_helpers[MAX_PORTS]; +static char rtsp_names[MAX_PORTS][10]; +static struct nf_conntrack_expect_policy rtsp_expect_policy; + +/* This function is intentionally _NOT_ defined as __exit */ +static void +fini(void) +{ + int i; + for (i = 0; i < num_ports; i++) { + DEBUGP("unregistering port %d\n", ports[i]); + nf_conntrack_helper_unregister(&rtsp_helpers[i]); + } + kfree(rtsp_buffer); +} + +static int __init +init(void) +{ + int i, ret; + struct nf_conntrack_helper *hlpr; + char *tmpname; + + printk("nf_conntrack_rtsp v" IP_NF_RTSP_VERSION " loading\n"); + + if (max_outstanding < 1) { + printk("nf_conntrack_rtsp: max_outstanding must be a positive integer\n"); + return -EBUSY; + } + if (setup_timeout < 0) { + printk("nf_conntrack_rtsp: setup_timeout must be a positive integer\n"); + return -EBUSY; + } + + rtsp_expect_policy.max_expected = max_outstanding; + rtsp_expect_policy.timeout = setup_timeout; + + rtsp_buffer = kmalloc(65536, GFP_KERNEL); + if (!rtsp_buffer) + return -ENOMEM; + + /* If no port given, default to standard rtsp port */ + if (ports[0] == 0) { + ports[0] = RTSP_PORT; + } + + for (i = 0; (i < MAX_PORTS) && ports[i]; i++) { + hlpr = &rtsp_helpers[i]; + memset(hlpr, 0, sizeof(struct nf_conntrack_helper)); + hlpr->tuple.src.u.tcp.port = htons(ports[i]); + hlpr->tuple.dst.protonum = IPPROTO_TCP; + hlpr->expect_policy = &rtsp_expect_policy; + hlpr->me = THIS_MODULE; + hlpr->help = help; + + tmpname = &rtsp_names[i][0]; + if (ports[i] == RTSP_PORT) { + sprintf(tmpname, "rtsp"); + } else { + sprintf(tmpname, "rtsp-%d", i); + } + hlpr->name = tmpname; + + DEBUGP("port #%d: %d\n", i, ports[i]); + + ret = nf_conntrack_helper_register(hlpr); + + if (ret) { + printk("nf_conntrack_rtsp: ERROR registering port %d\n", ports[i]); + fini(); + return -EBUSY; + } + num_ports++; + } + return 0; +} + +module_init(init); +module_exit(fini); + +EXPORT_SYMBOL(nf_nat_rtsp_hook_expectfn); + diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 93faf6a..6a9c9aa 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1363,8 +1363,25 @@ static int process_sip_request(struct sk_buff *skb, unsigned int dataoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_help *help = nfct_help(ct); + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned int matchoff, matchlen; unsigned int cseq, i; + union nf_inet_addr addr; + __be16 port; + + /* Many Cisco IP phones use a high source port for SIP requests, but + * listen for the response on port 5060. If we are the local + * router for one of these phones, save the port number from the + * Via: header so that nf_nat_sip can redirect the responses to + * the correct port. + */ + if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, + SIP_HDR_VIA_UDP, NULL, &matchoff, + &matchlen, &addr, &port) > 0 && + port != ct->tuplehash[dir].tuple.src.u.udp.port && + nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.src.u3)) + help->help.ct_sip_info.forced_dport = port; for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) { const struct sip_handler *handler; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 05e9feb..621e2da 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -239,6 +239,12 @@ static int ct_seq_show(struct seq_file *s, void *v) if (ct_show_delta_time(s, ct)) goto release; +#if defined(CONFIG_NETFILTER_XT_MATCH_LAYER7) || defined(CONFIG_NETFILTER_XT_MATCH_LAYER7_MODULE) + if(ct->layer7.app_proto && + seq_printf(s, "l7proto=%s ", ct->layer7.app_proto)) + return -ENOSPC; +#endif + if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) goto release; diff --git a/net/netfilter/regexp/regexp.c b/net/netfilter/regexp/regexp.c new file mode 100644 index 0000000..9006988 --- /dev/null +++ b/net/netfilter/regexp/regexp.c @@ -0,0 +1,1197 @@ +/* + * regcomp and regexec -- regsub and regerror are elsewhere + * @(#)regexp.c 1.3 of 18 April 87 + * + * Copyright (c) 1986 by University of Toronto. + * Written by Henry Spencer. Not derived from licensed software. + * + * Permission is granted to anyone to use this software for any + * purpose on any computer system, and to redistribute it freely, + * subject to the following restrictions: + * + * 1. The author is not responsible for the consequences of use of + * this software, no matter how awful, even if they arise + * from defects in it. + * + * 2. The origin of this software must not be misrepresented, either + * by explicit claim or by omission. + * + * 3. Altered versions must be plainly marked as such, and must not + * be misrepresented as being the original software. + * + * Beware that some of this code is subtly aware of the way operator + * precedence is structured in regular expressions. Serious changes in + * regular-expression syntax might require a total rethink. + * + * This code was modified by Ethan Sommer to work within the kernel + * (it now uses kmalloc etc..) + * + * Modified slightly by Matthew Strait to use more modern C. + */ + +#include "regexp.h" +#include "regmagic.h" + +/* added by ethan and matt. Lets it work in both kernel and user space. +(So iptables can use it, for instance.) Yea, it goes both ways... */ +#if __KERNEL__ + #define malloc(foo) kmalloc(foo,GFP_ATOMIC) +#else + #define printk(format,args...) printf(format,##args) +#endif + +void regerror(char * s) +{ + printk("<3>Regexp: %s\n", s); + /* NOTREACHED */ +} + +/* + * The "internal use only" fields in regexp.h are present to pass info from + * compile to execute that permits the execute phase to run lots faster on + * simple cases. They are: + * + * regstart char that must begin a match; '\0' if none obvious + * reganch is the match anchored (at beginning-of-line only)? + * regmust string (pointer into program) that match must include, or NULL + * regmlen length of regmust string + * + * Regstart and reganch permit very fast decisions on suitable starting points + * for a match, cutting down the work a lot. Regmust permits fast rejection + * of lines that cannot possibly match. The regmust tests are costly enough + * that regcomp() supplies a regmust only if the r.e. contains something + * potentially expensive (at present, the only such thing detected is * or + + * at the start of the r.e., which can involve a lot of backup). Regmlen is + * supplied because the test in regexec() needs it and regcomp() is computing + * it anyway. + */ + +/* + * Structure for regexp "program". This is essentially a linear encoding + * of a nondeterministic finite-state machine (aka syntax charts or + * "railroad normal form" in parsing technology). Each node is an opcode + * plus a "next" pointer, possibly plus an operand. "Next" pointers of + * all nodes except BRANCH implement concatenation; a "next" pointer with + * a BRANCH on both ends of it is connecting two alternatives. (Here we + * have one of the subtle syntax dependencies: an individual BRANCH (as + * opposed to a collection of them) is never concatenated with anything + * because of operator precedence.) The operand of some types of node is + * a literal string; for others, it is a node leading into a sub-FSM. In + * particular, the operand of a BRANCH node is the first node of the branch. + * (NB this is *not* a tree structure: the tail of the branch connects + * to the thing following the set of BRANCHes.) The opcodes are: + */ + +/* definition number opnd? meaning */ +#define END 0 /* no End of program. */ +#define BOL 1 /* no Match "" at beginning of line. */ +#define EOL 2 /* no Match "" at end of line. */ +#define ANY 3 /* no Match any one character. */ +#define ANYOF 4 /* str Match any character in this string. */ +#define ANYBUT 5 /* str Match any character not in this string. */ +#define BRANCH 6 /* node Match this alternative, or the next... */ +#define BACK 7 /* no Match "", "next" ptr points backward. */ +#define EXACTLY 8 /* str Match this string. */ +#define NOTHING 9 /* no Match empty string. */ +#define STAR 10 /* node Match this (simple) thing 0 or more times. */ +#define PLUS 11 /* node Match this (simple) thing 1 or more times. */ +#define OPEN 20 /* no Mark this point in input as start of #n. */ + /* OPEN+1 is number 1, etc. */ +#define CLOSE 30 /* no Analogous to OPEN. */ + +/* + * Opcode notes: + * + * BRANCH The set of branches constituting a single choice are hooked + * together with their "next" pointers, since precedence prevents + * anything being concatenated to any individual branch. The + * "next" pointer of the last BRANCH in a choice points to the + * thing following the whole choice. This is also where the + * final "next" pointer of each individual branch points; each + * branch starts with the operand node of a BRANCH node. + * + * BACK Normal "next" pointers all implicitly point forward; BACK + * exists to make loop structures possible. + * + * STAR,PLUS '?', and complex '*' and '+', are implemented as circular + * BRANCH structures using BACK. Simple cases (one character + * per match) are implemented with STAR and PLUS for speed + * and to minimize recursive plunges. + * + * OPEN,CLOSE ...are numbered at compile time. + */ + +/* + * A node is one char of opcode followed by two chars of "next" pointer. + * "Next" pointers are stored as two 8-bit pieces, high order first. The + * value is a positive offset from the opcode of the node containing it. + * An operand, if any, simply follows the node. (Note that much of the + * code generation knows about this implicit relationship.) + * + * Using two bytes for the "next" pointer is vast overkill for most things, + * but allows patterns to get big without disasters. + */ +#define OP(p) (*(p)) +#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377)) +#define OPERAND(p) ((p) + 3) + +/* + * See regmagic.h for one further detail of program structure. + */ + + +/* + * Utility definitions. + */ +#ifndef CHARBITS +#define UCHARAT(p) ((int)*(unsigned char *)(p)) +#else +#define UCHARAT(p) ((int)*(p)&CHARBITS) +#endif + +#define FAIL(m) { regerror(m); return(NULL); } +#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?') +#define META "^$.[()|?+*\\" + +/* + * Flags to be passed up and down. + */ +#define HASWIDTH 01 /* Known never to match null string. */ +#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */ +#define SPSTART 04 /* Starts with * or +. */ +#define WORST 0 /* Worst case. */ + +/* + * Global work variables for regcomp(). + */ +struct match_globals { +char *reginput; /* String-input pointer. */ +char *regbol; /* Beginning of input, for ^ check. */ +char **regstartp; /* Pointer to startp array. */ +char **regendp; /* Ditto for endp. */ +char *regparse; /* Input-scan pointer. */ +int regnpar; /* () count. */ +char regdummy; +char *regcode; /* Code-emit pointer; ®dummy = don't. */ +long regsize; /* Code size. */ +}; + +/* + * Forward declarations for regcomp()'s friends. + */ +#ifndef STATIC +#define STATIC static +#endif +STATIC char *reg(struct match_globals *g, int paren,int *flagp); +STATIC char *regbranch(struct match_globals *g, int *flagp); +STATIC char *regpiece(struct match_globals *g, int *flagp); +STATIC char *regatom(struct match_globals *g, int *flagp); +STATIC char *regnode(struct match_globals *g, char op); +STATIC char *regnext(struct match_globals *g, char *p); +STATIC void regc(struct match_globals *g, char b); +STATIC void reginsert(struct match_globals *g, char op, char *opnd); +STATIC void regtail(struct match_globals *g, char *p, char *val); +STATIC void regoptail(struct match_globals *g, char *p, char *val); + + +__kernel_size_t my_strcspn(const char *s1,const char *s2) +{ + char *scan1; + char *scan2; + int count; + + count = 0; + for (scan1 = (char *)s1; *scan1 != '\0'; scan1++) { + for (scan2 = (char *)s2; *scan2 != '\0';) /* ++ moved down. */ + if (*scan1 == *scan2++) + return(count); + count++; + } + return(count); +} + +/* + - regcomp - compile a regular expression into internal code + * + * We can't allocate space until we know how big the compiled form will be, + * but we can't compile it (and thus know how big it is) until we've got a + * place to put the code. So we cheat: we compile it twice, once with code + * generation turned off and size counting turned on, and once "for real". + * This also means that we don't allocate space until we are sure that the + * thing really will compile successfully, and we never have to move the + * code and thus invalidate pointers into it. (Note that it has to be in + * one piece because free() must be able to free it all.) + * + * Beware that the optimization-preparation code in here knows about some + * of the structure of the compiled regexp. + */ +regexp * +regcomp(char *exp,int *patternsize) +{ + register regexp *r; + register char *scan; + register char *longest; + register int len; + int flags; + struct match_globals g; + + /* commented out by ethan + extern char *malloc(); + */ + + if (exp == NULL) + FAIL("NULL argument"); + + /* First pass: determine size, legality. */ + g.regparse = exp; + g.regnpar = 1; + g.regsize = 0L; + g.regcode = &g.regdummy; + regc(&g, MAGIC); + if (reg(&g, 0, &flags) == NULL) + return(NULL); + + /* Small enough for pointer-storage convention? */ + if (g.regsize >= 32767L) /* Probably could be 65535L. */ + FAIL("regexp too big"); + + /* Allocate space. */ + *patternsize=sizeof(regexp) + (unsigned)g.regsize; + r = (regexp *)malloc(sizeof(regexp) + (unsigned)g.regsize); + if (r == NULL) + FAIL("out of space"); + + /* Second pass: emit code. */ + g.regparse = exp; + g.regnpar = 1; + g.regcode = r->program; + regc(&g, MAGIC); + if (reg(&g, 0, &flags) == NULL) + return(NULL); + + /* Dig out information for optimizations. */ + r->regstart = '\0'; /* Worst-case defaults. */ + r->reganch = 0; + r->regmust = NULL; + r->regmlen = 0; + scan = r->program+1; /* First BRANCH. */ + if (OP(regnext(&g, scan)) == END) { /* Only one top-level choice. */ + scan = OPERAND(scan); + + /* Starting-point info. */ + if (OP(scan) == EXACTLY) + r->regstart = *OPERAND(scan); + else if (OP(scan) == BOL) + r->reganch++; + + /* + * If there's something expensive in the r.e., find the + * longest literal string that must appear and make it the + * regmust. Resolve ties in favor of later strings, since + * the regstart check works with the beginning of the r.e. + * and avoiding duplication strengthens checking. Not a + * strong reason, but sufficient in the absence of others. + */ + if (flags&SPSTART) { + longest = NULL; + len = 0; + for (; scan != NULL; scan = regnext(&g, scan)) + if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) { + longest = OPERAND(scan); + len = strlen(OPERAND(scan)); + } + r->regmust = longest; + r->regmlen = len; + } + } + + return(r); +} + +/* + - reg - regular expression, i.e. main body or parenthesized thing + * + * Caller must absorb opening parenthesis. + * + * Combining parenthesis handling with the base level of regular expression + * is a trifle forced, but the need to tie the tails of the branches to what + * follows makes it hard to avoid. + */ +static char * +reg(struct match_globals *g, int paren, int *flagp /* Parenthesized? */ ) +{ + register char *ret; + register char *br; + register char *ender; + register int parno = 0; /* 0 makes gcc happy */ + int flags; + + *flagp = HASWIDTH; /* Tentatively. */ + + /* Make an OPEN node, if parenthesized. */ + if (paren) { + if (g->regnpar >= NSUBEXP) + FAIL("too many ()"); + parno = g->regnpar; + g->regnpar++; + ret = regnode(g, OPEN+parno); + } else + ret = NULL; + + /* Pick up the branches, linking them together. */ + br = regbranch(g, &flags); + if (br == NULL) + return(NULL); + if (ret != NULL) + regtail(g, ret, br); /* OPEN -> first. */ + else + ret = br; + if (!(flags&HASWIDTH)) + *flagp &= ~HASWIDTH; + *flagp |= flags&SPSTART; + while (*g->regparse == '|') { + g->regparse++; + br = regbranch(g, &flags); + if (br == NULL) + return(NULL); + regtail(g, ret, br); /* BRANCH -> BRANCH. */ + if (!(flags&HASWIDTH)) + *flagp &= ~HASWIDTH; + *flagp |= flags&SPSTART; + } + + /* Make a closing node, and hook it on the end. */ + ender = regnode(g, (paren) ? CLOSE+parno : END); + regtail(g, ret, ender); + + /* Hook the tails of the branches to the closing node. */ + for (br = ret; br != NULL; br = regnext(g, br)) + regoptail(g, br, ender); + + /* Check for proper termination. */ + if (paren && *g->regparse++ != ')') { + FAIL("unmatched ()"); + } else if (!paren && *g->regparse != '\0') { + if (*g->regparse == ')') { + FAIL("unmatched ()"); + } else + FAIL("junk on end"); /* "Can't happen". */ + /* NOTREACHED */ + } + + return(ret); +} + +/* + - regbranch - one alternative of an | operator + * + * Implements the concatenation operator. + */ +static char * +regbranch(struct match_globals *g, int *flagp) +{ + register char *ret; + register char *chain; + register char *latest; + int flags; + + *flagp = WORST; /* Tentatively. */ + + ret = regnode(g, BRANCH); + chain = NULL; + while (*g->regparse != '\0' && *g->regparse != '|' && *g->regparse != ')') { + latest = regpiece(g, &flags); + if (latest == NULL) + return(NULL); + *flagp |= flags&HASWIDTH; + if (chain == NULL) /* First piece. */ + *flagp |= flags&SPSTART; + else + regtail(g, chain, latest); + chain = latest; + } + if (chain == NULL) /* Loop ran zero times. */ + (void) regnode(g, NOTHING); + + return(ret); +} + +/* + - regpiece - something followed by possible [*+?] + * + * Note that the branching code sequences used for ? and the general cases + * of * and + are somewhat optimized: they use the same NOTHING node as + * both the endmarker for their branch list and the body of the last branch. + * It might seem that this node could be dispensed with entirely, but the + * endmarker role is not redundant. + */ +static char * +regpiece(struct match_globals *g, int *flagp) +{ + register char *ret; + register char op; + register char *next; + int flags; + + ret = regatom(g, &flags); + if (ret == NULL) + return(NULL); + + op = *g->regparse; + if (!ISMULT(op)) { + *flagp = flags; + return(ret); + } + + if (!(flags&HASWIDTH) && op != '?') + FAIL("*+ operand could be empty"); + *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH); + + if (op == '*' && (flags&SIMPLE)) + reginsert(g, STAR, ret); + else if (op == '*') { + /* Emit x* as (x&|), where & means "self". */ + reginsert(g, BRANCH, ret); /* Either x */ + regoptail(g, ret, regnode(g, BACK)); /* and loop */ + regoptail(g, ret, ret); /* back */ + regtail(g, ret, regnode(g, BRANCH)); /* or */ + regtail(g, ret, regnode(g, NOTHING)); /* null. */ + } else if (op == '+' && (flags&SIMPLE)) + reginsert(g, PLUS, ret); + else if (op == '+') { + /* Emit x+ as x(&|), where & means "self". */ + next = regnode(g, BRANCH); /* Either */ + regtail(g, ret, next); + regtail(g, regnode(g, BACK), ret); /* loop back */ + regtail(g, next, regnode(g, BRANCH)); /* or */ + regtail(g, ret, regnode(g, NOTHING)); /* null. */ + } else if (op == '?') { + /* Emit x? as (x|) */ + reginsert(g, BRANCH, ret); /* Either x */ + regtail(g, ret, regnode(g, BRANCH)); /* or */ + next = regnode(g, NOTHING); /* null. */ + regtail(g, ret, next); + regoptail(g, ret, next); + } + g->regparse++; + if (ISMULT(*g->regparse)) + FAIL("nested *?+"); + + return(ret); +} + +/* + - regatom - the lowest level + * + * Optimization: gobbles an entire sequence of ordinary characters so that + * it can turn them into a single node, which is smaller to store and + * faster to run. Backslashed characters are exceptions, each becoming a + * separate node; the code is simpler that way and it's not worth fixing. + */ +static char * +regatom(struct match_globals *g, int *flagp) +{ + register char *ret; + int flags; + + *flagp = WORST; /* Tentatively. */ + + switch (*g->regparse++) { + case '^': + ret = regnode(g, BOL); + break; + case '$': + ret = regnode(g, EOL); + break; + case '.': + ret = regnode(g, ANY); + *flagp |= HASWIDTH|SIMPLE; + break; + case '[': { + register int class; + register int classend; + + if (*g->regparse == '^') { /* Complement of range. */ + ret = regnode(g, ANYBUT); + g->regparse++; + } else + ret = regnode(g, ANYOF); + if (*g->regparse == ']' || *g->regparse == '-') + regc(g, *g->regparse++); + while (*g->regparse != '\0' && *g->regparse != ']') { + if (*g->regparse == '-') { + g->regparse++; + if (*g->regparse == ']' || *g->regparse == '\0') + regc(g, '-'); + else { + class = UCHARAT(g->regparse-2)+1; + classend = UCHARAT(g->regparse); + if (class > classend+1) + FAIL("invalid [] range"); + for (; class <= classend; class++) + regc(g, class); + g->regparse++; + } + } else + regc(g, *g->regparse++); + } + regc(g, '\0'); + if (*g->regparse != ']') + FAIL("unmatched []"); + g->regparse++; + *flagp |= HASWIDTH|SIMPLE; + } + break; + case '(': + ret = reg(g, 1, &flags); + if (ret == NULL) + return(NULL); + *flagp |= flags&(HASWIDTH|SPSTART); + break; + case '\0': + case '|': + case ')': + FAIL("internal urp"); /* Supposed to be caught earlier. */ + break; + case '?': + case '+': + case '*': + FAIL("?+* follows nothing"); + break; + case '\\': + if (*g->regparse == '\0') + FAIL("trailing \\"); + ret = regnode(g, EXACTLY); + regc(g, *g->regparse++); + regc(g, '\0'); + *flagp |= HASWIDTH|SIMPLE; + break; + default: { + register int len; + register char ender; + + g->regparse--; + len = my_strcspn((const char *)g->regparse, (const char *)META); + if (len <= 0) + FAIL("internal disaster"); + ender = *(g->regparse+len); + if (len > 1 && ISMULT(ender)) + len--; /* Back off clear of ?+* operand. */ + *flagp |= HASWIDTH; + if (len == 1) + *flagp |= SIMPLE; + ret = regnode(g, EXACTLY); + while (len > 0) { + regc(g, *g->regparse++); + len--; + } + regc(g, '\0'); + } + break; + } + + return(ret); +} + +/* + - regnode - emit a node + */ +static char * /* Location. */ +regnode(struct match_globals *g, char op) +{ + register char *ret; + register char *ptr; + + ret = g->regcode; + if (ret == &g->regdummy) { + g->regsize += 3; + return(ret); + } + + ptr = ret; + *ptr++ = op; + *ptr++ = '\0'; /* Null "next" pointer. */ + *ptr++ = '\0'; + g->regcode = ptr; + + return(ret); +} + +/* + - regc - emit (if appropriate) a byte of code + */ +static void +regc(struct match_globals *g, char b) +{ + if (g->regcode != &g->regdummy) + *g->regcode++ = b; + else + g->regsize++; +} + +/* + - reginsert - insert an operator in front of already-emitted operand + * + * Means relocating the operand. + */ +static void +reginsert(struct match_globals *g, char op, char* opnd) +{ + register char *src; + register char *dst; + register char *place; + + if (g->regcode == &g->regdummy) { + g->regsize += 3; + return; + } + + src = g->regcode; + g->regcode += 3; + dst = g->regcode; + while (src > opnd) + *--dst = *--src; + + place = opnd; /* Op node, where operand used to be. */ + *place++ = op; + *place++ = '\0'; + *place++ = '\0'; +} + +/* + - regtail - set the next-pointer at the end of a node chain + */ +static void +regtail(struct match_globals *g, char *p, char *val) +{ + register char *scan; + register char *temp; + register int offset; + + if (p == &g->regdummy) + return; + + /* Find last node. */ + scan = p; + for (;;) { + temp = regnext(g, scan); + if (temp == NULL) + break; + scan = temp; + } + + if (OP(scan) == BACK) + offset = scan - val; + else + offset = val - scan; + *(scan+1) = (offset>>8)&0377; + *(scan+2) = offset&0377; +} + +/* + - regoptail - regtail on operand of first argument; nop if operandless + */ +static void +regoptail(struct match_globals *g, char *p, char *val) +{ + /* "Operandless" and "op != BRANCH" are synonymous in practice. */ + if (p == NULL || p == &g->regdummy || OP(p) != BRANCH) + return; + regtail(g, OPERAND(p), val); +} + +/* + * regexec and friends + */ + + +/* + * Forwards. + */ +STATIC int regtry(struct match_globals *g, regexp *prog, char *string); +STATIC int regmatch(struct match_globals *g, char *prog); +STATIC int regrepeat(struct match_globals *g, char *p); + +#ifdef DEBUG +int regnarrate = 0; +void regdump(); +STATIC char *regprop(char *op); +#endif + +/* + - regexec - match a regexp against a string + */ +int +regexec(regexp *prog, char *string) +{ + register char *s; + struct match_globals g; + + /* Be paranoid... */ + if (prog == NULL || string == NULL) { + printk("<3>Regexp: NULL parameter\n"); + return(0); + } + + /* Check validity of program. */ + if (UCHARAT(prog->program) != MAGIC) { + printk("<3>Regexp: corrupted program\n"); + return(0); + } + + /* If there is a "must appear" string, look for it. */ + if (prog->regmust != NULL) { + s = string; + while ((s = strchr(s, prog->regmust[0])) != NULL) { + if (strncmp(s, prog->regmust, prog->regmlen) == 0) + break; /* Found it. */ + s++; + } + if (s == NULL) /* Not present. */ + return(0); + } + + /* Mark beginning of line for ^ . */ + g.regbol = string; + + /* Simplest case: anchored match need be tried only once. */ + if (prog->reganch) + return(regtry(&g, prog, string)); + + /* Messy cases: unanchored match. */ + s = string; + if (prog->regstart != '\0') + /* We know what char it must start with. */ + while ((s = strchr(s, prog->regstart)) != NULL) { + if (regtry(&g, prog, s)) + return(1); + s++; + } + else + /* We don't -- general case. */ + do { + if (regtry(&g, prog, s)) + return(1); + } while (*s++ != '\0'); + + /* Failure. */ + return(0); +} + +/* + - regtry - try match at specific point + */ +static int /* 0 failure, 1 success */ +regtry(struct match_globals *g, regexp *prog, char *string) +{ + register int i; + register char **sp; + register char **ep; + + g->reginput = string; + g->regstartp = prog->startp; + g->regendp = prog->endp; + + sp = prog->startp; + ep = prog->endp; + for (i = NSUBEXP; i > 0; i--) { + *sp++ = NULL; + *ep++ = NULL; + } + if (regmatch(g, prog->program + 1)) { + prog->startp[0] = string; + prog->endp[0] = g->reginput; + return(1); + } else + return(0); +} + +/* + - regmatch - main matching routine + * + * Conceptually the strategy is simple: check to see whether the current + * node matches, call self recursively to see whether the rest matches, + * and then act accordingly. In practice we make some effort to avoid + * recursion, in particular by going through "ordinary" nodes (that don't + * need to know whether the rest of the match failed) by a loop instead of + * by recursion. + */ +static int /* 0 failure, 1 success */ +regmatch(struct match_globals *g, char *prog) +{ + register char *scan = prog; /* Current node. */ + char *next; /* Next node. */ + +#ifdef DEBUG + if (scan != NULL && regnarrate) + fprintf(stderr, "%s(\n", regprop(scan)); +#endif + while (scan != NULL) { +#ifdef DEBUG + if (regnarrate) + fprintf(stderr, "%s...\n", regprop(scan)); +#endif + next = regnext(g, scan); + + switch (OP(scan)) { + case BOL: + if (g->reginput != g->regbol) + return(0); + break; + case EOL: + if (*g->reginput != '\0') + return(0); + break; + case ANY: + if (*g->reginput == '\0') + return(0); + g->reginput++; + break; + case EXACTLY: { + register int len; + register char *opnd; + + opnd = OPERAND(scan); + /* Inline the first character, for speed. */ + if (*opnd != *g->reginput) + return(0); + len = strlen(opnd); + if (len > 1 && strncmp(opnd, g->reginput, len) != 0) + return(0); + g->reginput += len; + } + break; + case ANYOF: + if (*g->reginput == '\0' || strchr(OPERAND(scan), *g->reginput) == NULL) + return(0); + g->reginput++; + break; + case ANYBUT: + if (*g->reginput == '\0' || strchr(OPERAND(scan), *g->reginput) != NULL) + return(0); + g->reginput++; + break; + case NOTHING: + case BACK: + break; + case OPEN+1: + case OPEN+2: + case OPEN+3: + case OPEN+4: + case OPEN+5: + case OPEN+6: + case OPEN+7: + case OPEN+8: + case OPEN+9: { + register int no; + register char *save; + + no = OP(scan) - OPEN; + save = g->reginput; + + if (regmatch(g, next)) { + /* + * Don't set startp if some later + * invocation of the same parentheses + * already has. + */ + if (g->regstartp[no] == NULL) + g->regstartp[no] = save; + return(1); + } else + return(0); + } + break; + case CLOSE+1: + case CLOSE+2: + case CLOSE+3: + case CLOSE+4: + case CLOSE+5: + case CLOSE+6: + case CLOSE+7: + case CLOSE+8: + case CLOSE+9: + { + register int no; + register char *save; + + no = OP(scan) - CLOSE; + save = g->reginput; + + if (regmatch(g, next)) { + /* + * Don't set endp if some later + * invocation of the same parentheses + * already has. + */ + if (g->regendp[no] == NULL) + g->regendp[no] = save; + return(1); + } else + return(0); + } + break; + case BRANCH: { + register char *save; + + if (OP(next) != BRANCH) /* No choice. */ + next = OPERAND(scan); /* Avoid recursion. */ + else { + do { + save = g->reginput; + if (regmatch(g, OPERAND(scan))) + return(1); + g->reginput = save; + scan = regnext(g, scan); + } while (scan != NULL && OP(scan) == BRANCH); + return(0); + /* NOTREACHED */ + } + } + break; + case STAR: + case PLUS: { + register char nextch; + register int no; + register char *save; + register int min; + + /* + * Lookahead to avoid useless match attempts + * when we know what character comes next. + */ + nextch = '\0'; + if (OP(next) == EXACTLY) + nextch = *OPERAND(next); + min = (OP(scan) == STAR) ? 0 : 1; + save = g->reginput; + no = regrepeat(g, OPERAND(scan)); + while (no >= min) { + /* If it could work, try it. */ + if (nextch == '\0' || *g->reginput == nextch) + if (regmatch(g, next)) + return(1); + /* Couldn't or didn't -- back up. */ + no--; + g->reginput = save + no; + } + return(0); + } + break; + case END: + return(1); /* Success! */ + break; + default: + printk("<3>Regexp: memory corruption\n"); + return(0); + break; + } + + scan = next; + } + + /* + * We get here only if there's trouble -- normally "case END" is + * the terminating point. + */ + printk("<3>Regexp: corrupted pointers\n"); + return(0); +} + +/* + - regrepeat - repeatedly match something simple, report how many + */ +static int +regrepeat(struct match_globals *g, char *p) +{ + register int count = 0; + register char *scan; + register char *opnd; + + scan = g->reginput; + opnd = OPERAND(p); + switch (OP(p)) { + case ANY: + count = strlen(scan); + scan += count; + break; + case EXACTLY: + while (*opnd == *scan) { + count++; + scan++; + } + break; + case ANYOF: + while (*scan != '\0' && strchr(opnd, *scan) != NULL) { + count++; + scan++; + } + break; + case ANYBUT: + while (*scan != '\0' && strchr(opnd, *scan) == NULL) { + count++; + scan++; + } + break; + default: /* Oh dear. Called inappropriately. */ + printk("<3>Regexp: internal foulup\n"); + count = 0; /* Best compromise. */ + break; + } + g->reginput = scan; + + return(count); +} + +/* + - regnext - dig the "next" pointer out of a node + */ +static char* +regnext(struct match_globals *g, char *p) +{ + register int offset; + + if (p == &g->regdummy) + return(NULL); + + offset = NEXT(p); + if (offset == 0) + return(NULL); + + if (OP(p) == BACK) + return(p-offset); + else + return(p+offset); +} + +#ifdef DEBUG + +STATIC char *regprop(); + +/* + - regdump - dump a regexp onto stdout in vaguely comprehensible form + */ +void +regdump(regexp *r) +{ + register char *s; + register char op = EXACTLY; /* Arbitrary non-END op. */ + register char *next; + /* extern char *strchr(); */ + + + s = r->program + 1; + while (op != END) { /* While that wasn't END last time... */ + op = OP(s); + printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */ + next = regnext(s); + if (next == NULL) /* Next ptr. */ + printf("(0)"); + else + printf("(%d)", (s-r->program)+(next-s)); + s += 3; + if (op == ANYOF || op == ANYBUT || op == EXACTLY) { + /* Literal string, where present. */ + while (*s != '\0') { + putchar(*s); + s++; + } + s++; + } + putchar('\n'); + } + + /* Header fields of interest. */ + if (r->regstart != '\0') + printf("start `%c' ", r->regstart); + if (r->reganch) + printf("anchored "); + if (r->regmust != NULL) + printf("must have \"%s\"", r->regmust); + printf("\n"); +} + +/* + - regprop - printable representation of opcode + */ +static char * +regprop(char *op) +{ +#define BUFLEN 50 + register char *p; + static char buf[BUFLEN]; + + strcpy(buf, ":"); + + switch (OP(op)) { + case BOL: + p = "BOL"; + break; + case EOL: + p = "EOL"; + break; + case ANY: + p = "ANY"; + break; + case ANYOF: + p = "ANYOF"; + break; + case ANYBUT: + p = "ANYBUT"; + break; + case BRANCH: + p = "BRANCH"; + break; + case EXACTLY: + p = "EXACTLY"; + break; + case NOTHING: + p = "NOTHING"; + break; + case BACK: + p = "BACK"; + break; + case END: + p = "END"; + break; + case OPEN+1: + case OPEN+2: + case OPEN+3: + case OPEN+4: + case OPEN+5: + case OPEN+6: + case OPEN+7: + case OPEN+8: + case OPEN+9: + snprintf(buf+strlen(buf),BUFLEN-strlen(buf), "OPEN%d", OP(op)-OPEN); + p = NULL; + break; + case CLOSE+1: + case CLOSE+2: + case CLOSE+3: + case CLOSE+4: + case CLOSE+5: + case CLOSE+6: + case CLOSE+7: + case CLOSE+8: + case CLOSE+9: + snprintf(buf+strlen(buf),BUFLEN-strlen(buf), "CLOSE%d", OP(op)-CLOSE); + p = NULL; + break; + case STAR: + p = "STAR"; + break; + case PLUS: + p = "PLUS"; + break; + default: + printk("<3>Regexp: corrupted opcode\n"); + break; + } + if (p != NULL) + strncat(buf, p, BUFLEN-strlen(buf)); + return(buf); +} +#endif + + diff --git a/net/netfilter/regexp/regexp.h b/net/netfilter/regexp/regexp.h new file mode 100644 index 0000000..a72eba7 --- /dev/null +++ b/net/netfilter/regexp/regexp.h @@ -0,0 +1,41 @@ +/* + * Definitions etc. for regexp(3) routines. + * + * Caveat: this is V8 regexp(3) [actually, a reimplementation thereof], + * not the System V one. + */ + +#ifndef REGEXP_H +#define REGEXP_H + + +/* +http://www.opensource.apple.com/darwinsource/10.3/expect-1/expect/expect.h , +which contains a version of this library, says: + + * + * NSUBEXP must be at least 10, and no greater than 117 or the parser + * will not work properly. + * + +However, it looks rather like this library is limited to 10. If you think +otherwise, let us know. +*/ + +#define NSUBEXP 10 +typedef struct regexp { + char *startp[NSUBEXP]; + char *endp[NSUBEXP]; + char regstart; /* Internal use only. */ + char reganch; /* Internal use only. */ + char *regmust; /* Internal use only. */ + int regmlen; /* Internal use only. */ + char program[1]; /* Unwarranted chumminess with compiler. */ +} regexp; + +regexp * regcomp(char *exp, int *patternsize); +int regexec(regexp *prog, char *string); +void regsub(regexp *prog, char *source, char *dest); +void regerror(char *s); + +#endif diff --git a/net/netfilter/regexp/regmagic.h b/net/netfilter/regexp/regmagic.h new file mode 100644 index 0000000..5acf447 --- /dev/null +++ b/net/netfilter/regexp/regmagic.h @@ -0,0 +1,5 @@ +/* + * The first byte of the regexp internal "program" is actually this magic + * number; the start node begins in the second byte. + */ +#define MAGIC 0234 diff --git a/net/netfilter/regexp/regsub.c b/net/netfilter/regexp/regsub.c new file mode 100644 index 0000000..339631f --- /dev/null +++ b/net/netfilter/regexp/regsub.c @@ -0,0 +1,95 @@ +/* + * regsub + * @(#)regsub.c 1.3 of 2 April 86 + * + * Copyright (c) 1986 by University of Toronto. + * Written by Henry Spencer. Not derived from licensed software. + * + * Permission is granted to anyone to use this software for any + * purpose on any computer system, and to redistribute it freely, + * subject to the following restrictions: + * + * 1. The author is not responsible for the consequences of use of + * this software, no matter how awful, even if they arise + * from defects in it. + * + * 2. The origin of this software must not be misrepresented, either + * by explicit claim or by omission. + * + * 3. Altered versions must be plainly marked as such, and must not + * be misrepresented as being the original software. + * + * + * This code was modified by Ethan Sommer to work within the kernel + * (it now uses kmalloc etc..) + * + */ +#include "regexp.h" +#include "regmagic.h" +#include + + +#ifndef CHARBITS +#define UCHARAT(p) ((int)*(unsigned char *)(p)) +#else +#define UCHARAT(p) ((int)*(p)&CHARBITS) +#endif + +#if 0 +//void regerror(char * s) +//{ +// printk("regexp(3): %s", s); +// /* NOTREACHED */ +//} +#endif + +/* + - regsub - perform substitutions after a regexp match + */ +void +regsub(regexp * prog, char * source, char * dest) +{ + register char *src; + register char *dst; + register char c; + register int no; + register int len; + + /* Not necessary and gcc doesn't like it -MLS */ + /*extern char *strncpy();*/ + + if (prog == NULL || source == NULL || dest == NULL) { + regerror("NULL parm to regsub"); + return; + } + if (UCHARAT(prog->program) != MAGIC) { + regerror("damaged regexp fed to regsub"); + return; + } + + src = source; + dst = dest; + while ((c = *src++) != '\0') { + if (c == '&') + no = 0; + else if (c == '\\' && '0' <= *src && *src <= '9') + no = *src++ - '0'; + else + no = -1; + + if (no < 0) { /* Ordinary character. */ + if (c == '\\' && (*src == '\\' || *src == '&')) + c = *src++; + *dst++ = c; + } else if (prog->startp[no] != NULL && prog->endp[no] != NULL) { + len = prog->endp[no] - prog->startp[no]; + (void) strncpy(dst, prog->startp[no], len); + dst += len; + if (len != 0 && *(dst-1) == '\0') { /* strncpy hit NUL. */ + regerror("damaged match string"); + return; + } + } + } + *dst++ = '\0'; +} diff --git a/net/netfilter/xt_layer7.c b/net/netfilter/xt_layer7.c new file mode 100644 index 0000000..837fe2fd --- /dev/null +++ b/net/netfilter/xt_layer7.c @@ -0,0 +1,700 @@ +/* + Kernel module to match application layer (OSI layer 7) data in connections. + + http://l7-filter.sf.net + + (C) 2003-2009 Matthew Strait and Ethan Sommer. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version + 2 of the License, or (at your option) any later version. + http://www.gnu.org/licenses/gpl.txt + + Based on ipt_string.c (C) 2000 Emmanuel Roger , + xt_helper.c (C) 2002 Harald Welte and cls_layer7.c (C) 2003 Matthew Strait, + Ethan Sommer, Justin Levandoski. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27) +#include +#include +#endif +#include +#include +#include +#include + +#include "regexp/regexp.c" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Matthew Strait , Ethan Sommer "); +MODULE_DESCRIPTION("iptables application layer match module"); +MODULE_ALIAS("ipt_layer7"); +MODULE_VERSION("2.21"); + +static int maxdatalen = 2048; // this is the default +module_param(maxdatalen, int, 0444); +MODULE_PARM_DESC(maxdatalen, "maximum bytes of data looked at by l7-filter"); +#ifdef CONFIG_NETFILTER_XT_MATCH_LAYER7_DEBUG + #define DPRINTK(format,args...) printk(format,##args) +#else + #define DPRINTK(format,args...) +#endif + +/* Number of packets whose data we look at. +This can be modified through /proc/net/layer7_numpackets */ +static int num_packets = 10; + +static struct pattern_cache { + char * regex_string; + regexp * pattern; + struct pattern_cache * next; +} * first_pattern_cache = NULL; + +DEFINE_SPINLOCK(l7_lock); + +static int total_acct_packets(struct nf_conn *ct) +{ +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 26) + BUG_ON(ct == NULL); + return (ct->counters[IP_CT_DIR_ORIGINAL].packets + ct->counters[IP_CT_DIR_REPLY].packets); +#else + struct nf_conn_counter *acct; + + BUG_ON(ct == NULL); + acct = nf_conn_acct_find(ct); + if (!acct) + return 0; + return (acct[IP_CT_DIR_ORIGINAL].packets + acct[IP_CT_DIR_REPLY].packets); +#endif +} + +#ifdef CONFIG_IP_NF_MATCH_LAYER7_DEBUG +/* Converts an unfriendly string into a friendly one by +replacing unprintables with periods and all whitespace with " ". */ +static char * friendly_print(unsigned char * s) +{ + char * f = kmalloc(strlen(s) + 1, GFP_ATOMIC); + int i; + + if(!f) { + if (net_ratelimit()) + printk(KERN_ERR "layer7: out of memory in " + "friendly_print, bailing.\n"); + return NULL; + } + + for(i = 0; i < strlen(s); i++){ + if(isprint(s[i]) && s[i] < 128) f[i] = s[i]; + else if(isspace(s[i])) f[i] = ' '; + else f[i] = '.'; + } + f[i] = '\0'; + return f; +} + +static char dec2hex(int i) +{ + switch (i) { + case 0 ... 9: + return (i + '0'); + break; + case 10 ... 15: + return (i - 10 + 'a'); + break; + default: + if (net_ratelimit()) + printk("layer7: Problem in dec2hex\n"); + return '\0'; + } +} + +static char * hex_print(unsigned char * s) +{ + char * g = kmalloc(strlen(s)*3 + 1, GFP_ATOMIC); + int i; + + if(!g) { + if (net_ratelimit()) + printk(KERN_ERR "layer7: out of memory in hex_print, " + "bailing.\n"); + return NULL; + } + + for(i = 0; i < strlen(s); i++) { + g[i*3 ] = dec2hex(s[i]/16); + g[i*3 + 1] = dec2hex(s[i]%16); + g[i*3 + 2] = ' '; + } + g[i*3] = '\0'; + + return g; +} +#endif // DEBUG + +/* Use instead of regcomp. As we expect to be seeing the same regexps over and +over again, it make sense to cache the results. */ +static regexp * compile_and_cache(const char * regex_string, + const char * protocol) +{ + struct pattern_cache * node = first_pattern_cache; + struct pattern_cache * last_pattern_cache = first_pattern_cache; + struct pattern_cache * tmp; + unsigned int len; + + while (node != NULL) { + if (!strcmp(node->regex_string, regex_string)) + return node->pattern; + + last_pattern_cache = node;/* points at the last non-NULL node */ + node = node->next; + } + + /* If we reach the end of the list, then we have not yet cached + the pattern for this regex. Let's do that now. + Be paranoid about running out of memory to avoid list corruption. */ + tmp = kmalloc(sizeof(struct pattern_cache), GFP_ATOMIC); + + if(!tmp) { + if (net_ratelimit()) + printk(KERN_ERR "layer7: out of memory in " + "compile_and_cache, bailing.\n"); + return NULL; + } + + tmp->regex_string = kmalloc(strlen(regex_string) + 1, GFP_ATOMIC); + tmp->pattern = kmalloc(sizeof(struct regexp), GFP_ATOMIC); + tmp->next = NULL; + + if(!tmp->regex_string || !tmp->pattern) { + if (net_ratelimit()) + printk(KERN_ERR "layer7: out of memory in " + "compile_and_cache, bailing.\n"); + kfree(tmp->regex_string); + kfree(tmp->pattern); + kfree(tmp); + return NULL; + } + + /* Ok. The new node is all ready now. */ + node = tmp; + + if(first_pattern_cache == NULL) /* list is empty */ + first_pattern_cache = node; /* make node the beginning */ + else + last_pattern_cache->next = node; /* attach node to the end */ + + /* copy the string and compile the regex */ + len = strlen(regex_string); + DPRINTK("About to compile this: \"%s\"\n", regex_string); + node->pattern = regcomp((char *)regex_string, &len); + if ( !node->pattern ) { + if (net_ratelimit()) + printk(KERN_ERR "layer7: Error compiling regexp " + "\"%s\" (%s)\n", + regex_string, protocol); + /* pattern is now cached as NULL, so we won't try again. */ + } + + strcpy(node->regex_string, regex_string); + return node->pattern; +} + +static int can_handle(const struct sk_buff *skb) +{ + if(!ip_hdr(skb)) /* not IP */ + return 0; + if(ip_hdr(skb)->protocol != IPPROTO_TCP && + ip_hdr(skb)->protocol != IPPROTO_UDP && + ip_hdr(skb)->protocol != IPPROTO_ICMP) + return 0; + return 1; +} + +/* Returns offset the into the skb->data that the application data starts */ +static int app_data_offset(const struct sk_buff *skb) +{ + /* In case we are ported somewhere (ebtables?) where ip_hdr(skb) + isn't set, this can be gotten from 4*(skb->data[0] & 0x0f) as well. */ + int ip_hl = 4*ip_hdr(skb)->ihl; + + if( ip_hdr(skb)->protocol == IPPROTO_TCP ) { + /* 12 == offset into TCP header for the header length field. + Can't get this with skb->h.th->doff because the tcphdr + struct doesn't get set when routing (this is confirmed to be + true in Netfilter as well as QoS.) */ + int tcp_hl = 4*(skb->data[ip_hl + 12] >> 4); + + return ip_hl + tcp_hl; + } else if( ip_hdr(skb)->protocol == IPPROTO_UDP ) { + return ip_hl + 8; /* UDP header is always 8 bytes */ + } else if( ip_hdr(skb)->protocol == IPPROTO_ICMP ) { + return ip_hl + 8; /* ICMP header is 8 bytes */ + } else { + if (net_ratelimit()) + printk(KERN_ERR "layer7: tried to handle unknown " + "protocol!\n"); + return ip_hl + 8; /* something reasonable */ + } +} + +/* handles whether there's a match when we aren't appending data anymore */ +static int match_no_append(struct nf_conn * conntrack, + struct nf_conn * master_conntrack, + enum ip_conntrack_info ctinfo, + enum ip_conntrack_info master_ctinfo, + const struct xt_layer7_info * info) +{ + /* If we're in here, throw the app data away */ + if(master_conntrack->layer7.app_data != NULL) { + + #ifdef CONFIG_IP_NF_MATCH_LAYER7_DEBUG + if(!master_conntrack->layer7.app_proto) { + char * f = + friendly_print(master_conntrack->layer7.app_data); + char * g = + hex_print(master_conntrack->layer7.app_data); + DPRINTK("\nl7-filter gave up after %d bytes " + "(%d packets):\n%s\n", + strlen(f), total_acct_packets(master_conntrack), f); + kfree(f); + DPRINTK("In hex: %s\n", g); + kfree(g); + } + #endif + + kfree(master_conntrack->layer7.app_data); + master_conntrack->layer7.app_data = NULL; /* don't free again */ + } + + if(master_conntrack->layer7.app_proto){ + /* Here child connections set their .app_proto (for /proc) */ + if(!conntrack->layer7.app_proto) { + conntrack->layer7.app_proto = + kmalloc(strlen(master_conntrack->layer7.app_proto)+1, + GFP_ATOMIC); + if(!conntrack->layer7.app_proto){ + if (net_ratelimit()) + printk(KERN_ERR "layer7: out of memory " + "in match_no_append, " + "bailing.\n"); + return 1; + } + strcpy(conntrack->layer7.app_proto, + master_conntrack->layer7.app_proto); + } + + return (!strcmp(master_conntrack->layer7.app_proto, + info->protocol)); + } + else { + /* If not classified, set to "unknown" to distinguish from + connections that are still being tested. */ + master_conntrack->layer7.app_proto = + kmalloc(strlen("unknown")+1, GFP_ATOMIC); + if(!master_conntrack->layer7.app_proto){ + if (net_ratelimit()) + printk(KERN_ERR "layer7: out of memory in " + "match_no_append, bailing.\n"); + return 1; + } + strcpy(master_conntrack->layer7.app_proto, "unknown"); + return 0; + } +} + +/* add the new app data to the conntrack. Return number of bytes added. */ +static int add_datastr(char *target, int offset, char *app_data, int len) +{ + int length = 0, i; + if (!target) return 0; + + /* Strip nulls. Make everything lower case (our regex lib doesn't + do case insensitivity). Add it to the end of the current data. */ + for(i = 0; i < maxdatalen-offset-1 && i < len; i++) { + if(app_data[i] != '\0') { + /* the kernel version of tolower mungs 'upper ascii' */ + target[length+offset] = + isascii(app_data[i])? + tolower(app_data[i]) : app_data[i]; + length++; + } + } + target[length+offset] = '\0'; + + return length; +} + +/* add the new app data to the conntrack. Return number of bytes added. */ +static int add_data(struct nf_conn * master_conntrack, + char * app_data, int appdatalen) +{ + int length; + + length = add_datastr(master_conntrack->layer7.app_data, master_conntrack->layer7.app_data_len, app_data, appdatalen); + master_conntrack->layer7.app_data_len += length; + + return length; +} + +/* taken from drivers/video/modedb.c */ +static int my_atoi(const char *s) +{ + int val = 0; + + for (;; s++) { + switch (*s) { + case '0'...'9': + val = 10*val+(*s-'0'); + break; + default: + return val; + } + } +} + +/* write out num_packets to userland. */ +static int layer7_read_proc(char* page, char ** start, off_t off, int count, + int* eof, void * data) +{ + if(num_packets > 99 && net_ratelimit()) + printk(KERN_ERR "layer7: NOT REACHED. num_packets too big\n"); + + page[0] = num_packets/10 + '0'; + page[1] = num_packets%10 + '0'; + page[2] = '\n'; + page[3] = '\0'; + + *eof=1; + + return 3; +} + +/* Read in num_packets from userland */ +static int layer7_write_proc(struct file* file, const char* buffer, + unsigned long count, void *data) +{ + char * foo = kmalloc(count, GFP_ATOMIC); + + if(!foo){ + if (net_ratelimit()) + printk(KERN_ERR "layer7: out of memory, bailing. " + "num_packets unchanged.\n"); + return count; + } + + if(copy_from_user(foo, buffer, count)) { + return -EFAULT; + } + + + num_packets = my_atoi(foo); + kfree (foo); + + /* This has an arbitrary limit to make the math easier. I'm lazy. + But anyway, 99 is a LOT! If you want more, you're doing it wrong! */ + if(num_packets > 99) { + printk(KERN_WARNING "layer7: num_packets can't be > 99.\n"); + num_packets = 99; + } else if(num_packets < 1) { + printk(KERN_WARNING "layer7: num_packets can't be < 1.\n"); + num_packets = 1; + } + + return count; +} + +static bool +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 35) +match(const struct sk_buff *skbin, struct xt_action_param *par) +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28) +match(const struct sk_buff *skbin, const struct xt_match_param *par) +#else +match(const struct sk_buff *skbin, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + bool *hotdrop) +#endif +{ + /* sidestep const without getting a compiler warning... */ + struct sk_buff * skb = (struct sk_buff *)skbin; + + const struct xt_layer7_info * info = + #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28) + par->matchinfo; + #else + matchinfo; + #endif + + enum ip_conntrack_info master_ctinfo, ctinfo; + struct nf_conn *master_conntrack, *conntrack; + unsigned char *app_data, *tmp_data; + unsigned int pattern_result, appdatalen; + regexp * comppattern; + + /* Be paranoid/incompetent - lock the entire match function. */ + spin_lock_bh(&l7_lock); + + if(!can_handle(skb)){ + DPRINTK("layer7: This is some protocol I can't handle.\n"); + spin_unlock_bh(&l7_lock); + return info->invert; + } + + /* Treat parent & all its children together as one connection, except + for the purpose of setting conntrack->layer7.app_proto in the actual + connection. This makes /proc/net/ip_conntrack more satisfying. */ + if(!(conntrack = nf_ct_get(skb, &ctinfo)) || + !(master_conntrack=nf_ct_get(skb,&master_ctinfo))){ + DPRINTK("layer7: couldn't get conntrack.\n"); + spin_unlock_bh(&l7_lock); + return info->invert; + } + + /* Try to get a master conntrack (and its master etc) for FTP, etc. */ + while (master_ct(master_conntrack) != NULL) + master_conntrack = master_ct(master_conntrack); + + /* if we've classified it or seen too many packets */ + if(!info->pkt && (total_acct_packets(master_conntrack) > num_packets || + master_conntrack->layer7.app_proto)) { + + pattern_result = match_no_append(conntrack, master_conntrack, + ctinfo, master_ctinfo, info); + + /* skb->cb[0] == seen. Don't do things twice if there are + multiple l7 rules. I'm not sure that using cb for this purpose + is correct, even though it says "put your private variables + there". But it doesn't look like it is being used for anything + else in the skbs that make it here. */ + skb->cb[0] = 1; /* marking it seen here's probably irrelevant */ + + spin_unlock_bh(&l7_lock); + return (pattern_result ^ info->invert); + } + + if(skb_is_nonlinear(skb)){ + if(skb_linearize(skb) != 0){ + if (net_ratelimit()) + printk(KERN_ERR "layer7: failed to linearize " + "packet, bailing.\n"); + spin_unlock_bh(&l7_lock); + return info->invert; + } + } + + /* now that the skb is linearized, it's safe to set these. */ + app_data = skb->data + app_data_offset(skb); + appdatalen = skb_tail_pointer(skb) - app_data; + + /* the return value gets checked later, when we're ready to use it */ + comppattern = compile_and_cache(info->pattern, info->protocol); + + if (info->pkt) { + tmp_data = kmalloc(maxdatalen, GFP_ATOMIC); + if(!tmp_data){ + if (net_ratelimit()) + printk(KERN_ERR "layer7: out of memory in match, bailing.\n"); + return info->invert; + } + + tmp_data[0] = '\0'; + add_datastr(tmp_data, 0, app_data, appdatalen); + pattern_result = ((comppattern && regexec(comppattern, tmp_data)) ? 1 : 0); + + kfree(tmp_data); + tmp_data = NULL; + spin_unlock_bh(&l7_lock); + + return (pattern_result ^ info->invert); + } + + /* On the first packet of a connection, allocate space for app data */ + if(total_acct_packets(master_conntrack) == 1 && !skb->cb[0] && + !master_conntrack->layer7.app_data){ + master_conntrack->layer7.app_data = + kmalloc(maxdatalen, GFP_ATOMIC); + if(!master_conntrack->layer7.app_data){ + if (net_ratelimit()) + printk(KERN_ERR "layer7: out of memory in " + "match, bailing.\n"); + spin_unlock_bh(&l7_lock); + return info->invert; + } + + master_conntrack->layer7.app_data[0] = '\0'; + } + + /* Can be here, but unallocated, if numpackets is increased near + the beginning of a connection */ + if(master_conntrack->layer7.app_data == NULL){ + spin_unlock_bh(&l7_lock); + return info->invert; /* unmatched */ + } + + if(!skb->cb[0]){ + int newbytes; + newbytes = add_data(master_conntrack, app_data, appdatalen); + + if(newbytes == 0) { /* didn't add any data */ + skb->cb[0] = 1; + /* Didn't match before, not going to match now */ + spin_unlock_bh(&l7_lock); + return info->invert; + } + } + + /* If looking for "unknown", then never match. "Unknown" means that + we've given up; we're still trying with these packets. */ + if(!strcmp(info->protocol, "unknown")) { + pattern_result = 0; + /* If looking for "unset", then always match. "Unset" means that we + haven't yet classified the connection. */ + } else if(!strcmp(info->protocol, "unset")) { + pattern_result = 2; + DPRINTK("layer7: matched unset: not yet classified " + "(%d/%d packets)\n", + total_acct_packets(master_conntrack), num_packets); + /* If the regexp failed to compile, don't bother running it */ + } else if(comppattern && + regexec(comppattern, master_conntrack->layer7.app_data)){ + DPRINTK("layer7: matched %s\n", info->protocol); + pattern_result = 1; + } else pattern_result = 0; + + if(pattern_result == 1) { + master_conntrack->layer7.app_proto = + kmalloc(strlen(info->protocol)+1, GFP_ATOMIC); + if(!master_conntrack->layer7.app_proto){ + if (net_ratelimit()) + printk(KERN_ERR "layer7: out of memory in " + "match, bailing.\n"); + spin_unlock_bh(&l7_lock); + return (pattern_result ^ info->invert); + } + strcpy(master_conntrack->layer7.app_proto, info->protocol); + } else if(pattern_result > 1) { /* cleanup from "unset" */ + pattern_result = 1; + } + + /* mark the packet seen */ + skb->cb[0] = 1; + + spin_unlock_bh(&l7_lock); + return (pattern_result ^ info->invert); +} + +// load nf_conntrack_ipv4 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 35) +static int +#else +static bool +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28) +check(const struct xt_mtchk_param *par) +{ + if (nf_ct_l3proto_try_module_get(par->match->family) < 0) { + printk(KERN_WARNING "can't load conntrack support for " + "proto=%d\n", par->match->family); +#else +check(const char *tablename, const void *inf, + const struct xt_match *match, void *matchinfo, + unsigned int hook_mask) +{ + if (nf_ct_l3proto_try_module_get(match->family) < 0) { + printk(KERN_WARNING "can't load conntrack support for " + "proto=%d\n", match->family); +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 35) + return -EINVAL; + } + return 0; +#else + return 0; + } + return 1; +#endif +} + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28) + static void destroy(const struct xt_mtdtor_param *par) + { + nf_ct_l3proto_module_put(par->match->family); + } +#else + static void destroy(const struct xt_match *match, void *matchinfo) + { + nf_ct_l3proto_module_put(match->family); + } +#endif + +static struct xt_match xt_layer7_match[] __read_mostly = { +{ + .name = "layer7", + .family = AF_INET, + .checkentry = check, + .match = match, + .destroy = destroy, + .matchsize = sizeof(struct xt_layer7_info), + .me = THIS_MODULE +} +}; + +static void layer7_cleanup_proc(void) +{ + remove_proc_entry("layer7_numpackets", init_net.proc_net); +} + +/* register the proc file */ +static void layer7_init_proc(void) +{ + struct proc_dir_entry* entry; + entry = create_proc_entry("layer7_numpackets", 0644, init_net.proc_net); + entry->read_proc = layer7_read_proc; + entry->write_proc = layer7_write_proc; +} + +static int __init xt_layer7_init(void) +{ + need_conntrack(); + + layer7_init_proc(); + if(maxdatalen < 1) { + printk(KERN_WARNING "layer7: maxdatalen can't be < 1, " + "using 1\n"); + maxdatalen = 1; + } + /* This is not a hard limit. It's just here to prevent people from + bringing their slow machines to a grinding halt. */ + else if(maxdatalen > 65536) { + printk(KERN_WARNING "layer7: maxdatalen can't be > 65536, " + "using 65536\n"); + maxdatalen = 65536; + } + return xt_register_matches(xt_layer7_match, + ARRAY_SIZE(xt_layer7_match)); +} + +static void __exit xt_layer7_fini(void) +{ + layer7_cleanup_proc(); + xt_unregister_matches(xt_layer7_match, ARRAY_SIZE(xt_layer7_match)); +} + +module_init(xt_layer7_init); +module_exit(xt_layer7_fini); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index fafb968..f9b8fd6 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -210,6 +210,7 @@ struct packet_sock { unsigned int tp_loss:1; unsigned int tp_tstamp; struct packet_type prot_hook ____cacheline_aligned_in_smp; + unsigned int pkt_type; }; struct packet_skb_cb { @@ -354,6 +355,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, { struct sock *sk; struct sockaddr_pkt *spkt; + struct packet_sock *po; /* * When we registered the protocol we saved the socket in the data @@ -361,6 +363,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, */ sk = pt->af_packet_priv; + po = pkt_sk(sk); /* * Yank back the headers [hope the device set this @@ -373,7 +376,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, * so that this procedure is noop. */ - if (skb->pkt_type == PACKET_LOOPBACK) + if (!(po->pkt_type & (1 << skb->pkt_type))) goto out; if (!net_eq(dev_net(dev), sock_net(sk))) @@ -566,12 +569,12 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, int skb_len = skb->len; unsigned int snaplen, res; - if (skb->pkt_type == PACKET_LOOPBACK) - goto drop; - sk = pt->af_packet_priv; po = pkt_sk(sk); + if (!(po->pkt_type & (1 << skb->pkt_type))) + goto drop; + if (!net_eq(dev_net(dev), sock_net(sk))) goto drop; @@ -690,12 +693,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct timespec ts; struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); - if (skb->pkt_type == PACKET_LOOPBACK) - goto drop; - sk = pt->af_packet_priv; po = pkt_sk(sk); + if (!(po->pkt_type & (1 << skb->pkt_type))) + goto drop; + if (!net_eq(dev_net(dev), sock_net(sk))) goto drop; @@ -1532,6 +1535,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, spin_lock_init(&po->bind_lock); mutex_init(&po->pg_vec_lock); po->prot_hook.func = packet_rcv; + po->pkt_type = PACKET_MASK_ANY & ~(1 << PACKET_LOOPBACK); if (sock->type == SOCK_PACKET) po->prot_hook.func = packet_rcv_spkt; @@ -2105,6 +2109,16 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv po->tp_tstamp = val; return 0; } + case PACKET_RECV_TYPE: + { + unsigned int val; + if (optlen != sizeof(val)) + return -EINVAL; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + po->pkt_type = val & ~PACKET_LOOPBACK; + return 0; + } default: return -ENOPROTOOPT; } @@ -2162,6 +2176,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, data = &val; break; + case PACKET_RECV_TYPE: + if (len > sizeof(unsigned int)) + len = sizeof(unsigned int); + val = po->pkt_type; + + data = &val; + break; case PACKET_VERSION: if (len > sizeof(int)) len = sizeof(int); diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 2590e91..61a944a 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -148,6 +148,37 @@ config NET_SCH_SFQ To compile this code as a module, choose M here: the module will be called sch_sfq. +config NET_SCH_ESFQ + tristate "Enhanced Stochastic Fairness Queueing (ESFQ)" + ---help--- + Say Y here if you want to use the Enhanced Stochastic Fairness + Queueing (ESFQ) packet scheduling algorithm for some of your network + devices or as a leaf discipline for a classful qdisc such as HTB or + CBQ (see the top of for details and + references to the SFQ algorithm). + + This is an enchanced SFQ version which allows you to control some + hardcoded values in the SFQ scheduler. + + ESFQ also adds control of the hash function used to identify packet + flows. The original SFQ discipline hashes by connection; ESFQ add + several other hashing methods, such as by src IP or by dst IP, which + can be more fair to users in some networking situations. + + To compile this code as a module, choose M here: the + module will be called sch_esfq. + +config NET_SCH_ESFQ_NFCT + bool "Connection Tracking Hash Types" + depends on NET_SCH_ESFQ && NF_CONNTRACK + ---help--- + Say Y here to enable support for hashing based on netfilter connection + tracking information. This is useful for a router that is also using + NAT to connect privately-addressed hosts to the Internet. If you want + to provide fair distribution of upstream bandwidth, ESFQ must use + connection tracking information, since all outgoing packets will share + the same source address. + config NET_SCH_TEQL tristate "True Link Equalizer (TEQL)" ---help--- @@ -571,6 +602,19 @@ config NET_ACT_CSUM To compile this code as a module, choose M here: the module will be called act_csum. +config NET_ACT_CONNMARK + tristate "Connection Tracking Marking" + depends on NET_CLS_ACT + depends on NF_CONNTRACK + depends on NF_CONNTRACK_MARK + ---help--- + Say Y here to restore the connmark from a scheduler action + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called act_connmark. + config NET_CLS_IND bool "Incoming device classification" depends on NET_CLS_U32 || NET_CLS_FW diff --git a/net/sched/Makefile b/net/sched/Makefile index dc5889c..4261c96 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o +obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o @@ -26,6 +27,7 @@ obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o obj-$(CONFIG_NET_SCH_SFB) += sch_sfb.o obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o +obj-$(CONFIG_NET_SCH_ESFQ) += sch_esfq.o obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c new file mode 100644 index 0000000..742a0f0 --- /dev/null +++ b/net/sched/act_connmark.c @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2011 Felix Fietkau + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define TCA_ACT_CONNMARK 20 + +#define CONNMARK_TAB_MASK 3 +static struct tcf_common *tcf_connmark_ht[CONNMARK_TAB_MASK + 1]; +static u32 connmark_idx_gen; +static DEFINE_RWLOCK(connmark_lock); + +static struct tcf_hashinfo connmark_hash_info = { + .htab = tcf_connmark_ht, + .hmask = CONNMARK_TAB_MASK, + .lock = &connmark_lock, +}; + +static int tcf_connmark(struct sk_buff *skb, struct tc_action *a, + struct tcf_result *res) +{ + struct nf_conn *c; + enum ip_conntrack_info ctinfo; + int proto; + int r; + + if (skb->protocol == htons(ETH_P_IP)) { + if (skb->len < sizeof(struct iphdr)) + goto out; + proto = PF_INET; + } else if (skb->protocol == htons(ETH_P_IPV6)) { + if (skb->len < sizeof(struct ipv6hdr)) + goto out; + proto = PF_INET6; + } else + goto out; + + r = nf_conntrack_in(dev_net(skb->dev), proto, NF_INET_PRE_ROUTING, skb); + if (r != NF_ACCEPT) + goto out; + + c = nf_ct_get(skb, &ctinfo); + if (!c) + goto out; + + skb->mark = c->mark; + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; + +out: + return TC_ACT_PIPE; +} + +static int tcf_connmark_init(struct nlattr *nla, struct nlattr *est, + struct tc_action *a, int ovr, int bind) +{ + struct tcf_common *pc; + + pc = tcf_hash_create(0, est, a, sizeof(*pc), bind, + &connmark_idx_gen, &connmark_hash_info); + if (IS_ERR(pc)) + return PTR_ERR(pc); + + tcf_hash_insert(pc, &connmark_hash_info); + + return ACT_P_CREATED; +} + +static inline int tcf_connmark_cleanup(struct tc_action *a, int bind) +{ + if (a->priv) + return tcf_hash_release(a->priv, bind, &connmark_hash_info); + return 0; +} + +static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + return skb->len; +} + +static struct tc_action_ops act_connmark_ops = { + .kind = "connmark", + .hinfo = &connmark_hash_info, + .type = TCA_ACT_CONNMARK, + .capab = TCA_CAP_NONE, + .owner = THIS_MODULE, + .act = tcf_connmark, + .dump = tcf_connmark_dump, + .cleanup = tcf_connmark_cleanup, + .init = tcf_connmark_init, + .walk = tcf_generic_walker, +}; + +MODULE_AUTHOR("Felix Fietkau "); +MODULE_DESCRIPTION("Connection tracking mark restoring"); +MODULE_LICENSE("GPL"); + +static int __init connmark_init_module(void) +{ + return tcf_register_action(&act_connmark_ops); +} + +static void __exit connmark_cleanup_module(void) +{ + tcf_unregister_action(&act_connmark_ops); +} + +module_init(connmark_init_module); +module_exit(connmark_cleanup_module); diff --git a/net/sched/sch_esfq.c b/net/sched/sch_esfq.c new file mode 100644 index 0000000..6618a74 --- /dev/null +++ b/net/sched/sch_esfq.c @@ -0,0 +1,702 @@ +/* + * net/sched/sch_esfq.c Extended Stochastic Fairness Queueing discipline. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, + * + * Changes: Alexander Atanasov, + * Added dynamic depth,limit,divisor,hash_kind options. + * Added dst and src hashes. + * + * Alexander Clouter, + * Ported ESFQ to Linux 2.6. + * + * Corey Hickey, + * Maintenance of the Linux 2.6 port. + * Added fwmark hash (thanks to Robert Kurjata). + * Added usage of jhash. + * Added conntrack support. + * Added ctnatchg hash (thanks to Ben Pfountz). + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_NET_SCH_ESFQ_NFCT +#include +#endif + +/* Stochastic Fairness Queuing algorithm. + For more comments look at sch_sfq.c. + The difference is that you can change limit, depth, + hash table size and choose alternate hash types. + + classic: same as in sch_sfq.c + dst: destination IP address + src: source IP address + fwmark: netfilter mark value + ctorigdst: original destination IP address + ctorigsrc: original source IP address + ctrepldst: reply destination IP address + ctreplsrc: reply source IP + +*/ + +#define ESFQ_HEAD 0 +#define ESFQ_TAIL 1 + +/* This type should contain at least SFQ_DEPTH*2 values */ +typedef unsigned int esfq_index; + +struct esfq_head +{ + esfq_index next; + esfq_index prev; +}; + +struct esfq_sched_data +{ +/* Parameters */ + int perturb_period; + unsigned quantum; /* Allotment per round: MUST BE >= MTU */ + int limit; + unsigned depth; + unsigned hash_divisor; + unsigned hash_kind; +/* Variables */ + struct timer_list perturb_timer; + int perturbation; + esfq_index tail; /* Index of current slot in round */ + esfq_index max_depth; /* Maximal depth */ + + esfq_index *ht; /* Hash table */ + esfq_index *next; /* Active slots link */ + short *allot; /* Current allotment per slot */ + unsigned short *hash; /* Hash value indexed by slots */ + struct sk_buff_head *qs; /* Slot queue */ + struct esfq_head *dep; /* Linked list of slots, indexed by depth */ +}; + +/* This contains the info we will hash. */ +struct esfq_packet_info +{ + u32 proto; /* protocol or port */ + u32 src; /* source from packet header */ + u32 dst; /* destination from packet header */ + u32 ctorigsrc; /* original source from conntrack */ + u32 ctorigdst; /* original destination from conntrack */ + u32 ctreplsrc; /* reply source from conntrack */ + u32 ctrepldst; /* reply destination from conntrack */ + u32 mark; /* netfilter mark (fwmark) */ +}; + +static __inline__ unsigned esfq_jhash_1word(struct esfq_sched_data *q,u32 a) +{ + return jhash_1word(a, q->perturbation) & (q->hash_divisor-1); +} + +static __inline__ unsigned esfq_jhash_2words(struct esfq_sched_data *q, u32 a, u32 b) +{ + return jhash_2words(a, b, q->perturbation) & (q->hash_divisor-1); +} + +static __inline__ unsigned esfq_jhash_3words(struct esfq_sched_data *q, u32 a, u32 b, u32 c) +{ + return jhash_3words(a, b, c, q->perturbation) & (q->hash_divisor-1); +} + +static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb) +{ + struct esfq_packet_info info; +#ifdef CONFIG_NET_SCH_ESFQ_NFCT + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); +#endif + + switch (skb->protocol) { + case __constant_htons(ETH_P_IP): + { + struct iphdr *iph = ip_hdr(skb); + info.dst = iph->daddr; + info.src = iph->saddr; + if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && + (iph->protocol == IPPROTO_TCP || + iph->protocol == IPPROTO_UDP || + iph->protocol == IPPROTO_SCTP || + iph->protocol == IPPROTO_DCCP || + iph->protocol == IPPROTO_ESP)) + info.proto = *(((u32*)iph) + iph->ihl); + else + info.proto = iph->protocol; + break; + } + case __constant_htons(ETH_P_IPV6): + { + struct ipv6hdr *iph = ipv6_hdr(skb); + /* Hash ipv6 addresses into a u32. This isn't ideal, + * but the code is simple. */ + info.dst = jhash2(iph->daddr.s6_addr32, 4, q->perturbation); + info.src = jhash2(iph->saddr.s6_addr32, 4, q->perturbation); + if (iph->nexthdr == IPPROTO_TCP || + iph->nexthdr == IPPROTO_UDP || + iph->nexthdr == IPPROTO_SCTP || + iph->nexthdr == IPPROTO_DCCP || + iph->nexthdr == IPPROTO_ESP) + info.proto = *(u32*)&iph[1]; + else + info.proto = iph->nexthdr; + break; + } + default: + info.dst = (u32)(unsigned long)skb_dst(skb); + info.src = (u32)(unsigned long)skb->sk; + info.proto = skb->protocol; + } + + info.mark = skb->mark; + +#ifdef CONFIG_NET_SCH_ESFQ_NFCT + /* defaults if there is no conntrack info */ + info.ctorigsrc = info.src; + info.ctorigdst = info.dst; + info.ctreplsrc = info.dst; + info.ctrepldst = info.src; + /* collect conntrack info */ + if (ct && ct != &nf_conntrack_untracked) { + if (skb->protocol == __constant_htons(ETH_P_IP)) { + info.ctorigsrc = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; + info.ctorigdst = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip; + info.ctreplsrc = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip; + info.ctrepldst = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip; + } + else if (skb->protocol == __constant_htons(ETH_P_IPV6)) { + /* Again, hash ipv6 addresses into a single u32. */ + info.ctorigsrc = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6, 4, q->perturbation); + info.ctorigdst = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip6, 4, q->perturbation); + info.ctreplsrc = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6, 4, q->perturbation); + info.ctrepldst = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6, 4, q->perturbation); + } + + } +#endif + + switch(q->hash_kind) { + case TCA_SFQ_HASH_CLASSIC: + return esfq_jhash_3words(q, info.dst, info.src, info.proto); + case TCA_SFQ_HASH_DST: + return esfq_jhash_1word(q, info.dst); + case TCA_SFQ_HASH_SRC: + return esfq_jhash_1word(q, info.src); + case TCA_SFQ_HASH_FWMARK: + return esfq_jhash_1word(q, info.mark); +#ifdef CONFIG_NET_SCH_ESFQ_NFCT + case TCA_SFQ_HASH_CTORIGDST: + return esfq_jhash_1word(q, info.ctorigdst); + case TCA_SFQ_HASH_CTORIGSRC: + return esfq_jhash_1word(q, info.ctorigsrc); + case TCA_SFQ_HASH_CTREPLDST: + return esfq_jhash_1word(q, info.ctrepldst); + case TCA_SFQ_HASH_CTREPLSRC: + return esfq_jhash_1word(q, info.ctreplsrc); + case TCA_SFQ_HASH_CTNATCHG: + { + if (info.ctorigdst == info.ctreplsrc) + return esfq_jhash_1word(q, info.ctorigsrc); + return esfq_jhash_1word(q, info.ctreplsrc); + } +#endif + default: + if (net_ratelimit()) + printk(KERN_WARNING "ESFQ: Unknown hash method. Falling back to classic.\n"); + } + return esfq_jhash_3words(q, info.dst, info.src, info.proto); +} + +static inline void esfq_link(struct esfq_sched_data *q, esfq_index x) +{ + esfq_index p, n; + int d = q->qs[x].qlen + q->depth; + + p = d; + n = q->dep[d].next; + q->dep[x].next = n; + q->dep[x].prev = p; + q->dep[p].next = q->dep[n].prev = x; +} + +static inline void esfq_dec(struct esfq_sched_data *q, esfq_index x) +{ + esfq_index p, n; + + n = q->dep[x].next; + p = q->dep[x].prev; + q->dep[p].next = n; + q->dep[n].prev = p; + + if (n == p && q->max_depth == q->qs[x].qlen + 1) + q->max_depth--; + + esfq_link(q, x); +} + +static inline void esfq_inc(struct esfq_sched_data *q, esfq_index x) +{ + esfq_index p, n; + int d; + + n = q->dep[x].next; + p = q->dep[x].prev; + q->dep[p].next = n; + q->dep[n].prev = p; + d = q->qs[x].qlen; + if (q->max_depth < d) + q->max_depth = d; + + esfq_link(q, x); +} + +static unsigned int esfq_drop(struct Qdisc *sch) +{ + struct esfq_sched_data *q = qdisc_priv(sch); + esfq_index d = q->max_depth; + struct sk_buff *skb; + unsigned int len; + + /* Queue is full! Find the longest slot and + drop a packet from it */ + + if (d > 1) { + esfq_index x = q->dep[d+q->depth].next; + skb = q->qs[x].prev; + len = skb->len; + __skb_unlink(skb, &q->qs[x]); + kfree_skb(skb); + esfq_dec(q, x); + sch->q.qlen--; + sch->qstats.drops++; + sch->qstats.backlog -= len; + return len; + } + + if (d == 1) { + /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ + d = q->next[q->tail]; + q->next[q->tail] = q->next[d]; + q->allot[q->next[d]] += q->quantum; + skb = q->qs[d].prev; + len = skb->len; + __skb_unlink(skb, &q->qs[d]); + kfree_skb(skb); + esfq_dec(q, d); + sch->q.qlen--; + q->ht[q->hash[d]] = q->depth; + sch->qstats.drops++; + sch->qstats.backlog -= len; + return len; + } + + return 0; +} + +static void esfq_q_enqueue(struct sk_buff *skb, struct esfq_sched_data *q, unsigned int end) +{ + unsigned hash = esfq_hash(q, skb); + unsigned depth = q->depth; + esfq_index x; + + x = q->ht[hash]; + if (x == depth) { + q->ht[hash] = x = q->dep[depth].next; + q->hash[x] = hash; + } + + if (end == ESFQ_TAIL) + __skb_queue_tail(&q->qs[x], skb); + else + __skb_queue_head(&q->qs[x], skb); + + esfq_inc(q, x); + if (q->qs[x].qlen == 1) { /* The flow is new */ + if (q->tail == depth) { /* It is the first flow */ + q->tail = x; + q->next[x] = x; + q->allot[x] = q->quantum; + } else { + q->next[x] = q->next[q->tail]; + q->next[q->tail] = x; + q->tail = x; + } + } +} + +static int esfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) +{ + struct esfq_sched_data *q = qdisc_priv(sch); + esfq_q_enqueue(skb, q, ESFQ_TAIL); + sch->qstats.backlog += skb->len; + if (++sch->q.qlen < q->limit-1) { + sch->bstats.bytes += skb->len; + sch->bstats.packets++; + return 0; + } + + sch->qstats.drops++; + esfq_drop(sch); + return NET_XMIT_CN; +} + +static struct sk_buff *esfq_peek(struct Qdisc* sch) +{ + struct esfq_sched_data *q = qdisc_priv(sch); + esfq_index a; + + /* No active slots */ + if (q->tail == q->depth) + return NULL; + + a = q->next[q->tail]; + return skb_peek(&q->qs[a]); +} + +static struct sk_buff *esfq_q_dequeue(struct esfq_sched_data *q) +{ + struct sk_buff *skb; + unsigned depth = q->depth; + esfq_index a, old_a; + + /* No active slots */ + if (q->tail == depth) + return NULL; + + a = old_a = q->next[q->tail]; + + /* Grab packet */ + skb = __skb_dequeue(&q->qs[a]); + esfq_dec(q, a); + + /* Is the slot empty? */ + if (q->qs[a].qlen == 0) { + q->ht[q->hash[a]] = depth; + a = q->next[a]; + if (a == old_a) { + q->tail = depth; + return skb; + } + q->next[q->tail] = a; + q->allot[a] += q->quantum; + } else if ((q->allot[a] -= skb->len) <= 0) { + q->tail = a; + a = q->next[a]; + q->allot[a] += q->quantum; + } + + return skb; +} + +static struct sk_buff *esfq_dequeue(struct Qdisc* sch) +{ + struct esfq_sched_data *q = qdisc_priv(sch); + struct sk_buff *skb; + + skb = esfq_q_dequeue(q); + if (skb == NULL) + return NULL; + sch->q.qlen--; + sch->qstats.backlog -= skb->len; + return skb; +} + +static void esfq_q_destroy(struct esfq_sched_data *q) +{ + del_timer(&q->perturb_timer); + if(q->ht) + kfree(q->ht); + if(q->dep) + kfree(q->dep); + if(q->next) + kfree(q->next); + if(q->allot) + kfree(q->allot); + if(q->hash) + kfree(q->hash); + if(q->qs) + kfree(q->qs); +} + +static void esfq_destroy(struct Qdisc *sch) +{ + struct esfq_sched_data *q = qdisc_priv(sch); + esfq_q_destroy(q); +} + + +static void esfq_reset(struct Qdisc* sch) +{ + struct sk_buff *skb; + + while ((skb = esfq_dequeue(sch)) != NULL) + kfree_skb(skb); +} + +static void esfq_perturbation(unsigned long arg) +{ + struct Qdisc *sch = (struct Qdisc*)arg; + struct esfq_sched_data *q = qdisc_priv(sch); + + q->perturbation = net_random()&0x1F; + + if (q->perturb_period) { + q->perturb_timer.expires = jiffies + q->perturb_period; + add_timer(&q->perturb_timer); + } +} + +static unsigned int esfq_check_hash(unsigned int kind) +{ + switch (kind) { + case TCA_SFQ_HASH_CTORIGDST: + case TCA_SFQ_HASH_CTORIGSRC: + case TCA_SFQ_HASH_CTREPLDST: + case TCA_SFQ_HASH_CTREPLSRC: + case TCA_SFQ_HASH_CTNATCHG: +#ifndef CONFIG_NET_SCH_ESFQ_NFCT + { + if (net_ratelimit()) + printk(KERN_WARNING "ESFQ: Conntrack hash types disabled in kernel config. Falling back to classic.\n"); + return TCA_SFQ_HASH_CLASSIC; + } +#endif + case TCA_SFQ_HASH_CLASSIC: + case TCA_SFQ_HASH_DST: + case TCA_SFQ_HASH_SRC: + case TCA_SFQ_HASH_FWMARK: + return kind; + default: + { + if (net_ratelimit()) + printk(KERN_WARNING "ESFQ: Unknown hash type. Falling back to classic.\n"); + return TCA_SFQ_HASH_CLASSIC; + } + } +} + +static int esfq_q_init(struct esfq_sched_data *q, struct nlattr *opt) +{ + struct tc_esfq_qopt *ctl = nla_data(opt); + esfq_index p = ~0U/2; + int i; + + if (opt && opt->nla_len < nla_attr_size(sizeof(*ctl))) + return -EINVAL; + + q->perturbation = 0; + q->hash_kind = TCA_SFQ_HASH_CLASSIC; + q->max_depth = 0; + if (opt == NULL) { + q->perturb_period = 0; + q->hash_divisor = 1024; + q->tail = q->limit = q->depth = 128; + + } else { + struct tc_esfq_qopt *ctl = nla_data(opt); + if (ctl->quantum) + q->quantum = ctl->quantum; + q->perturb_period = ctl->perturb_period*HZ; + q->hash_divisor = ctl->divisor ? : 1024; + q->tail = q->limit = q->depth = ctl->flows ? : 128; + + if ( q->depth > p - 1 ) + return -EINVAL; + + if (ctl->limit) + q->limit = min_t(u32, ctl->limit, q->depth); + + if (ctl->hash_kind) { + q->hash_kind = esfq_check_hash(ctl->hash_kind); + } + } + + q->ht = kmalloc(q->hash_divisor*sizeof(esfq_index), GFP_KERNEL); + if (!q->ht) + goto err_case; + q->dep = kmalloc((1+q->depth*2)*sizeof(struct esfq_head), GFP_KERNEL); + if (!q->dep) + goto err_case; + q->next = kmalloc(q->depth*sizeof(esfq_index), GFP_KERNEL); + if (!q->next) + goto err_case; + q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL); + if (!q->allot) + goto err_case; + q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL); + if (!q->hash) + goto err_case; + q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL); + if (!q->qs) + goto err_case; + + for (i=0; i< q->hash_divisor; i++) + q->ht[i] = q->depth; + for (i=0; idepth; i++) { + skb_queue_head_init(&q->qs[i]); + q->dep[i+q->depth].next = i+q->depth; + q->dep[i+q->depth].prev = i+q->depth; + } + + for (i=0; idepth; i++) + esfq_link(q, i); + return 0; +err_case: + esfq_q_destroy(q); + return -ENOBUFS; +} + +static int esfq_init(struct Qdisc *sch, struct nlattr *opt) +{ + struct esfq_sched_data *q = qdisc_priv(sch); + int err; + + q->quantum = psched_mtu(qdisc_dev(sch)); /* default */ + if ((err = esfq_q_init(q, opt))) + return err; + + init_timer(&q->perturb_timer); + q->perturb_timer.data = (unsigned long)sch; + q->perturb_timer.function = esfq_perturbation; + if (q->perturb_period) { + q->perturb_timer.expires = jiffies + q->perturb_period; + add_timer(&q->perturb_timer); + } + + return 0; +} + +static int esfq_change(struct Qdisc *sch, struct nlattr *opt) +{ + struct esfq_sched_data *q = qdisc_priv(sch); + struct esfq_sched_data new; + struct sk_buff *skb; + int err; + + /* set up new queue */ + memset(&new, 0, sizeof(struct esfq_sched_data)); + new.quantum = psched_mtu(qdisc_dev(sch)); /* default */ + if ((err = esfq_q_init(&new, opt))) + return err; + + /* copy all packets from the old queue to the new queue */ + sch_tree_lock(sch); + while ((skb = esfq_q_dequeue(q)) != NULL) + esfq_q_enqueue(skb, &new, ESFQ_TAIL); + + /* clean up the old queue */ + esfq_q_destroy(q); + + /* copy elements of the new queue into the old queue */ + q->perturb_period = new.perturb_period; + q->quantum = new.quantum; + q->limit = new.limit; + q->depth = new.depth; + q->hash_divisor = new.hash_divisor; + q->hash_kind = new.hash_kind; + q->tail = new.tail; + q->max_depth = new.max_depth; + q->ht = new.ht; + q->dep = new.dep; + q->next = new.next; + q->allot = new.allot; + q->hash = new.hash; + q->qs = new.qs; + + /* finish up */ + if (q->perturb_period) { + q->perturb_timer.expires = jiffies + q->perturb_period; + add_timer(&q->perturb_timer); + } else { + q->perturbation = 0; + } + sch_tree_unlock(sch); + return 0; +} + +static int esfq_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct esfq_sched_data *q = qdisc_priv(sch); + unsigned char *b = skb_tail_pointer(skb); + struct tc_esfq_qopt opt; + + opt.quantum = q->quantum; + opt.perturb_period = q->perturb_period/HZ; + + opt.limit = q->limit; + opt.divisor = q->hash_divisor; + opt.flows = q->depth; + opt.hash_kind = q->hash_kind; + + NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); + + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static struct Qdisc_ops esfq_qdisc_ops = +{ + .next = NULL, + .cl_ops = NULL, + .id = "esfq", + .priv_size = sizeof(struct esfq_sched_data), + .enqueue = esfq_enqueue, + .dequeue = esfq_dequeue, + .peek = esfq_peek, + .drop = esfq_drop, + .init = esfq_init, + .reset = esfq_reset, + .destroy = esfq_destroy, + .change = esfq_change, + .dump = esfq_dump, + .owner = THIS_MODULE, +}; + +static int __init esfq_module_init(void) +{ + return register_qdisc(&esfq_qdisc_ops); +} +static void __exit esfq_module_exit(void) +{ + unregister_qdisc(&esfq_qdisc_ops); +} +module_init(esfq_module_init) +module_exit(esfq_module_exit) +MODULE_LICENSE("GPL"); -- 1.7.4.1