diff options
Diffstat (limited to 'drivers/net/ethernet')
517 files changed, 17154 insertions, 6523 deletions
diff --git a/drivers/net/ethernet/8390/8390.h b/drivers/net/ethernet/8390/8390.h index e52264465998..f784a6e2ab0e 100644 --- a/drivers/net/ethernet/8390/8390.h +++ b/drivers/net/ethernet/8390/8390.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: GPL-1.0+ */ + /* Generic NS8390 register definitions. */ /* This file is part of Donald Becker's 8390 drivers, and is distributed diff --git a/drivers/net/ethernet/8390/apne.c b/drivers/net/ethernet/8390/apne.c index 991ad953aa79..a09f383dd249 100644 --- a/drivers/net/ethernet/8390/apne.c +++ b/drivers/net/ethernet/8390/apne.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Amiga Linux/68k 8390 based PCMCIA Ethernet Driver for the Amiga 1200 * @@ -19,12 +20,6 @@ * * ---------------------------------------------------------------------------- * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. - * - * ---------------------------------------------------------------------------- - * */ diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c index 78f985885547..fea489af72fb 100644 --- a/drivers/net/ethernet/8390/axnet_cs.c +++ b/drivers/net/ethernet/8390/axnet_cs.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: GPL-1.0+ + /*====================================================================== A PCMCIA ethernet driver for Asix AX88190-based cards @@ -17,9 +19,7 @@ Written 1992,1993 by Donald Becker. Copyright 1993 United States Government as represented by the - Director, National Security Agency. This software may be used and - distributed according to the terms of the GNU General Public License, - incorporated herein by reference. + Director, National Security Agency. Donald Becker may be reached at becker@scyld.com ======================================================================*/ diff --git a/drivers/net/ethernet/8390/hydra.c b/drivers/net/ethernet/8390/hydra.c index 1df7601af86a..24f49a8ff903 100644 --- a/drivers/net/ethernet/8390/hydra.c +++ b/drivers/net/ethernet/8390/hydra.c @@ -1,10 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only + /* New Hydra driver using generic 8390 core */ /* Based on old hydra driver by Topi Kanerva (topi@susanna.oulu.fi) */ -/* This file is subject to the terms and conditions of the GNU General */ -/* Public License. See the file COPYING in the main directory of the */ -/* Linux distribution for more details. */ - /* Peter De Schrijver (p2@mind.be) */ /* Oldenburg 2000 */ diff --git a/drivers/net/ethernet/8390/lib8390.c b/drivers/net/ethernet/8390/lib8390.c index e84021282edf..84aeb8054304 100644 --- a/drivers/net/ethernet/8390/lib8390.c +++ b/drivers/net/ethernet/8390/lib8390.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: GPL-1.0+ + /* 8390.c: A general NS8390 ethernet driver core for linux. */ /* Written 1992-94 by Donald Becker. @@ -5,9 +7,6 @@ Copyright 1993 United States Government as represented by the Director, National Security Agency. - This software may be used and distributed according to the terms - of the GNU General Public License, incorporated herein by reference. - The author may be reached as becker@scyld.com, or C/O Scyld Computing Corporation 410 Severn Ave., Suite 210 diff --git a/drivers/net/ethernet/8390/mac8390.c b/drivers/net/ethernet/8390/mac8390.c index 7fb819b9b89a..4a0a095a1a8a 100644 --- a/drivers/net/ethernet/8390/mac8390.c +++ b/drivers/net/ethernet/8390/mac8390.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-1.0+ /* mac8390.c: New driver for 8390-based Nubus (or Nubus-alike) Ethernet cards on Linux */ /* Based on the former daynaport.c driver, by Alan Cox. Some code taken from or inspired by skeleton.c by Donald Becker, acenic.c by - Jes Sorensen, and ne2k-pci.c by Donald Becker and Paul Gortmaker. - - This software may be used and distributed according to the terms of - the GNU Public License, incorporated herein by reference. */ + Jes Sorensen, and ne2k-pci.c by Donald Becker and Paul Gortmaker. */ /* 2000-02-28: support added for Dayna and Kinetics cards by A.G.deWijn@phys.uu.nl */ diff --git a/drivers/net/ethernet/8390/mcf8390.c b/drivers/net/ethernet/8390/mcf8390.c index 8a7918d33419..217838b28220 100644 --- a/drivers/net/ethernet/8390/mcf8390.c +++ b/drivers/net/ethernet/8390/mcf8390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Support for ColdFire CPU based boards using a NS8390 Ethernet device. * @@ -5,9 +6,6 @@ * * (C) Copyright 2012, Greg Ungerer <gerg@uclinux.org> * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. */ #include <linux/module.h> diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c index bc9c81dc00fd..7d89ec1cf273 100644 --- a/drivers/net/ethernet/8390/ne.c +++ b/drivers/net/ethernet/8390/ne.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* ne.c: A general non-shared-memory NS8390 ethernet driver for linux. */ /* Written 1992-94 by Donald Becker. @@ -5,9 +6,6 @@ Copyright 1993 United States Government as represented by the Director, National Security Agency. - This software may be used and distributed according to the terms - of the GNU General Public License, incorporated herein by reference. - The author may be reached as becker@scyld.com, or C/O Scyld Computing Corporation, 410 Severn Ave., Suite 210, Annapolis MD 21403 diff --git a/drivers/net/ethernet/8390/ne2k-pci.c b/drivers/net/ethernet/8390/ne2k-pci.c index 6a0a2039600a..2c6bd36d2f31 100644 --- a/drivers/net/ethernet/8390/ne2k-pci.c +++ b/drivers/net/ethernet/8390/ne2k-pci.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* A Linux device driver for PCI NE2000 clones. * * Authors and other copyright holders: diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c index 0f07fe03da98..9bd5e991f1e5 100644 --- a/drivers/net/ethernet/8390/pcnet_cs.c +++ b/drivers/net/ethernet/8390/pcnet_cs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /*====================================================================== A PCMCIA ethernet driver for NS8390-based cards @@ -17,9 +18,7 @@ Written 1992,1993 by Donald Becker. Copyright 1993 United States Government as represented by the - Director, National Security Agency. This software may be used and - distributed according to the terms of the GNU General Public License, - incorporated herein by reference. + Director, National Security Agency. Donald Becker may be reached at becker@scyld.com Based also on Keith Moore's changes to Don Becker's code, for IBM diff --git a/drivers/net/ethernet/8390/smc-ultra.c b/drivers/net/ethernet/8390/smc-ultra.c index 7465650c8078..22ca804b2e95 100644 --- a/drivers/net/ethernet/8390/smc-ultra.c +++ b/drivers/net/ethernet/8390/smc-ultra.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* smc-ultra.c: A SMC Ultra ethernet driver for linux. */ /* This is a driver for the SMC Ultra and SMC EtherEZ ISA ethercards. @@ -7,9 +8,6 @@ Copyright 1993 United States Government as represented by the Director, National Security Agency. - This software may be used and distributed according to the terms - of the GNU General Public License, incorporated herein by reference. - The author may be reached as becker@scyld.com, or C/O Scyld Computing Corporation 410 Severn Ave., Suite 210 diff --git a/drivers/net/ethernet/8390/stnic.c b/drivers/net/ethernet/8390/stnic.c index bd89ca8a92df..265976e3b64a 100644 --- a/drivers/net/ethernet/8390/stnic.c +++ b/drivers/net/ethernet/8390/stnic.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* stnic.c : A SH7750 specific part of driver for NS DP83902A ST-NIC. * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * * Copyright (C) 1999 kaz Kojima */ diff --git a/drivers/net/ethernet/8390/wd.c b/drivers/net/ethernet/8390/wd.c index 119021d41451..ffd639477dfc 100644 --- a/drivers/net/ethernet/8390/wd.c +++ b/drivers/net/ethernet/8390/wd.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* wd.c: A WD80x3 ethernet driver for linux. */ /* Written 1993-94 by Donald Becker. @@ -5,9 +6,6 @@ Copyright 1993 United States Government as represented by the Director, National Security Agency. - This software may be used and distributed according to the terms - of the GNU General Public License, incorporated herein by reference. - The author may be reached as becker@scyld.com, or C/O Scyld Computing Corporation 410 Severn Ave., Suite 210 diff --git a/drivers/net/ethernet/8390/zorro8390.c b/drivers/net/ethernet/8390/zorro8390.c index e8b4fe813a08..d70390e9d03d 100644 --- a/drivers/net/ethernet/8390/zorro8390.c +++ b/drivers/net/ethernet/8390/zorro8390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Amiga Linux/m68k and Linux/PPC Zorro NS8390 Ethernet Driver * @@ -9,12 +10,6 @@ * * --------------------------------------------------------------------------- * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. - * - * --------------------------------------------------------------------------- - * * The Ariadne II and X-Surf are Zorro-II boards containing Realtek RTL8019AS * Ethernet Controllers. */ diff --git a/drivers/net/ethernet/adi/adin1110.c b/drivers/net/ethernet/adi/adin1110.c index f5c2d7a9abc1..ca66b747b7c5 100644 --- a/drivers/net/ethernet/adi/adin1110.c +++ b/drivers/net/ethernet/adi/adin1110.c @@ -739,7 +739,7 @@ static int adin1110_broadcasts_filter(struct adin1110_port_priv *port_priv, u32 port_rules = 0; u8 mask[ETH_ALEN]; - memset(mask, 0xFF, ETH_ALEN); + eth_broadcast_addr(mask); if (accept_broadcast && port_priv->state == BR_STATE_FORWARDING) port_rules = adin1110_port_rules(port_priv, true, true); @@ -760,7 +760,7 @@ static int adin1110_set_mac_address(struct net_device *netdev, return -EADDRNOTAVAIL; eth_hw_addr_set(netdev, dev_addr); - memset(mask, 0xFF, ETH_ALEN); + eth_broadcast_addr(mask); mac_slot = (!port_priv->nr) ? ADIN_MAC_P1_ADDR_SLOT : ADIN_MAC_P2_ADDR_SLOT; port_rules = adin1110_port_rules(port_priv, true, false); @@ -1271,7 +1271,7 @@ static int adin1110_port_set_blocking_state(struct adin1110_port_priv *port_priv goto out; /* Allow only BPDUs to be passed to the CPU */ - memset(mask, 0xFF, ETH_ALEN); + eth_broadcast_addr(mask); port_rules = adin1110_port_rules(port_priv, true, false); ret = adin1110_write_mac_address(port_priv, mac_slot, mac, mask, port_rules); @@ -1385,8 +1385,8 @@ static int adin1110_fdb_add(struct adin1110_port_priv *port_priv, return -ENOMEM; other_port = priv->ports[!port_priv->nr]; - port_rules = adin1110_port_rules(port_priv, false, true); - memset(mask, 0xFF, ETH_ALEN); + port_rules = adin1110_port_rules(other_port, false, true); + eth_broadcast_addr(mask); return adin1110_write_mac_address(other_port, mac_nr, (u8 *)fdb->addr, mask, port_rules); diff --git a/drivers/net/ethernet/altera/Kconfig b/drivers/net/ethernet/altera/Kconfig index dd7fd41ccde5..17985319088c 100644 --- a/drivers/net/ethernet/altera/Kconfig +++ b/drivers/net/ethernet/altera/Kconfig @@ -4,7 +4,9 @@ config ALTERA_TSE depends on HAS_DMA select PHYLIB select PHYLINK - select PCS_ALTERA_TSE + select PCS_LYNX + select MDIO_REGMAP + select REGMAP_MMIO help This driver supports the Altera Triple-Speed (TSE) Ethernet MAC. diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 66e3af73ec41..2e15800e5310 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -27,14 +27,16 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/mii.h> +#include <linux/mdio/mdio-regmap.h> #include <linux/netdevice.h> #include <linux/of_device.h> #include <linux/of_mdio.h> #include <linux/of_net.h> #include <linux/of_platform.h> -#include <linux/pcs-altera-tse.h> +#include <linux/pcs-lynx.h> #include <linux/phy.h> #include <linux/platform_device.h> +#include <linux/regmap.h> #include <linux/skbuff.h> #include <asm/cacheflush.h> @@ -1036,10 +1038,6 @@ static struct net_device_ops altera_tse_netdev_ops = { .ndo_validate_addr = eth_validate_addr, }; -static void alt_tse_mac_an_restart(struct phylink_config *config) -{ -} - static void alt_tse_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) { @@ -1096,7 +1094,6 @@ static struct phylink_pcs *alt_tse_select_pcs(struct phylink_config *config, } static const struct phylink_mac_ops alt_tse_phylink_ops = { - .mac_an_restart = alt_tse_mac_an_restart, .mac_config = alt_tse_mac_config, .mac_link_down = alt_tse_mac_link_down, .mac_link_up = alt_tse_mac_link_up, @@ -1137,13 +1134,16 @@ static int request_and_map(struct platform_device *pdev, const char *name, static int altera_tse_probe(struct platform_device *pdev) { const struct of_device_id *of_id = NULL; + struct regmap_config pcs_regmap_cfg; struct altera_tse_private *priv; + struct mdio_regmap_config mrc; struct resource *control_port; + struct regmap *pcs_regmap; struct resource *dma_res; struct resource *pcs_res; + struct mii_bus *pcs_bus; struct net_device *ndev; void __iomem *descmap; - int pcs_reg_width = 2; int ret = -ENODEV; ndev = alloc_etherdev(sizeof(struct altera_tse_private)); @@ -1255,18 +1255,41 @@ static int altera_tse_probe(struct platform_device *pdev) if (ret) goto err_free_netdev; + memset(&pcs_regmap_cfg, 0, sizeof(pcs_regmap_cfg)); + memset(&mrc, 0, sizeof(mrc)); /* SGMII PCS address space. The location can vary depending on how the * IP is integrated. We can have a resource dedicated to it at a specific * address space, but if it's not the case, we fallback to the mdiophy0 * from the MAC's address space */ - ret = request_and_map(pdev, "pcs", &pcs_res, - &priv->pcs_base); + ret = request_and_map(pdev, "pcs", &pcs_res, &priv->pcs_base); if (ret) { + /* If we can't find a dedicated resource for the PCS, fallback + * to the internal PCS, that has a different address stride + */ priv->pcs_base = priv->mac_dev + tse_csroffs(mdio_phy0); - pcs_reg_width = 4; + pcs_regmap_cfg.reg_bits = 32; + /* Values are MDIO-like values, on 16 bits */ + pcs_regmap_cfg.val_bits = 16; + pcs_regmap_cfg.reg_shift = REGMAP_UPSHIFT(2); + } else { + pcs_regmap_cfg.reg_bits = 16; + pcs_regmap_cfg.val_bits = 16; + pcs_regmap_cfg.reg_shift = REGMAP_UPSHIFT(1); } + /* Create a regmap for the PCS so that it can be used by the PCS driver */ + pcs_regmap = devm_regmap_init_mmio(&pdev->dev, priv->pcs_base, + &pcs_regmap_cfg); + if (IS_ERR(pcs_regmap)) { + ret = PTR_ERR(pcs_regmap); + goto err_free_netdev; + } + mrc.regmap = pcs_regmap; + mrc.parent = &pdev->dev; + mrc.valid_addr = 0x0; + mrc.autoscan = false; + /* Rx IRQ */ priv->rx_irq = platform_get_irq_byname(pdev, "rx_irq"); if (priv->rx_irq == -ENXIO) { @@ -1389,7 +1412,18 @@ static int altera_tse_probe(struct platform_device *pdev) (unsigned long) control_port->start, priv->rx_irq, priv->tx_irq); - priv->pcs = alt_tse_pcs_create(ndev, priv->pcs_base, pcs_reg_width); + snprintf(mrc.name, MII_BUS_ID_SIZE, "%s-pcs-mii", ndev->name); + pcs_bus = devm_mdio_regmap_register(&pdev->dev, &mrc); + if (IS_ERR(pcs_bus)) { + ret = PTR_ERR(pcs_bus); + goto err_init_pcs; + } + + priv->pcs = lynx_pcs_create_mdiodev(pcs_bus, 0); + if (IS_ERR(priv->pcs)) { + ret = PTR_ERR(priv->pcs); + goto err_init_pcs; + } priv->phylink_config.dev = &ndev->dev; priv->phylink_config.type = PHYLINK_NETDEV; @@ -1412,12 +1446,13 @@ static int altera_tse_probe(struct platform_device *pdev) if (IS_ERR(priv->phylink)) { dev_err(&pdev->dev, "failed to create phylink\n"); ret = PTR_ERR(priv->phylink); - goto err_init_phy; + goto err_init_phylink; } return 0; - -err_init_phy: +err_init_phylink: + lynx_pcs_destroy(priv->pcs); +err_init_pcs: unregister_netdev(ndev); err_register_netdev: netif_napi_del(&priv->napi); @@ -1438,6 +1473,8 @@ static int altera_tse_remove(struct platform_device *pdev) altera_tse_mdio_destroy(ndev); unregister_netdev(ndev); phylink_destroy(priv->phylink); + lynx_pcs_destroy(priv->pcs); + free_netdev(ndev); return 0; diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h index 466ad9470d1f..6de0d590be34 100644 --- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h +++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h @@ -869,7 +869,9 @@ struct ena_admin_host_info { * 2 : interrupt_moderation * 3 : rx_buf_mirroring * 4 : rss_configurable_function_key - * 31:5 : reserved + * 5 : reserved + * 6 : rx_page_reuse + * 31:7 : reserved */ u32 driver_supported_features; }; @@ -1184,6 +1186,8 @@ struct ena_admin_ena_mmio_req_read_less_resp { #define ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_MASK BIT(3) #define ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_SHIFT 4 #define ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK BIT(4) +#define ENA_ADMIN_HOST_INFO_RX_PAGE_REUSE_SHIFT 6 +#define ENA_ADMIN_HOST_INFO_RX_PAGE_REUSE_MASK BIT(6) /* aenq_common_desc */ #define ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 451c3a1b6255..633b321d7fdd 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -35,6 +35,8 @@ #define ENA_REGS_ADMIN_INTR_MASK 1 +#define ENA_MAX_BACKOFF_DELAY_EXP 16U + #define ENA_MIN_ADMIN_POLL_US 100 #define ENA_MAX_ADMIN_POLL_US 5000 @@ -536,6 +538,7 @@ static int ena_com_comp_status_to_errno(struct ena_com_admin_queue *admin_queue, static void ena_delay_exponential_backoff_us(u32 exp, u32 delay_us) { + exp = min_t(u32, exp, ENA_MAX_BACKOFF_DELAY_EXP); delay_us = max_t(u32, ENA_MIN_ADMIN_POLL_US, delay_us); delay_us = min_t(u32, delay_us * (1U << exp), ENA_MAX_ADMIN_POLL_US); usleep_range(delay_us, 2 * delay_us); diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index e6a6efaeb87c..d19593fae226 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1023,7 +1023,7 @@ static int ena_alloc_rx_buffer(struct ena_ring *rx_ring, int tailroom; /* restore page offset value in case it has been changed by device */ - rx_info->page_offset = headroom; + rx_info->buf_offset = headroom; /* if previous allocated page is not used */ if (unlikely(rx_info->page)) @@ -1040,6 +1040,8 @@ static int ena_alloc_rx_buffer(struct ena_ring *rx_ring, tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); rx_info->page = page; + rx_info->dma_addr = dma; + rx_info->page_offset = 0; ena_buf = &rx_info->ena_buf; ena_buf->paddr = dma + headroom; ena_buf->len = ENA_PAGE_SIZE - headroom - tailroom; @@ -1047,14 +1049,12 @@ static int ena_alloc_rx_buffer(struct ena_ring *rx_ring, return 0; } -static void ena_unmap_rx_buff(struct ena_ring *rx_ring, - struct ena_rx_buffer *rx_info) +static void ena_unmap_rx_buff_attrs(struct ena_ring *rx_ring, + struct ena_rx_buffer *rx_info, + unsigned long attrs) { - struct ena_com_buf *ena_buf = &rx_info->ena_buf; - - dma_unmap_page(rx_ring->dev, ena_buf->paddr - rx_ring->rx_headroom, - ENA_PAGE_SIZE, - DMA_BIDIRECTIONAL); + dma_unmap_page_attrs(rx_ring->dev, rx_info->dma_addr, ENA_PAGE_SIZE, + DMA_BIDIRECTIONAL, attrs); } static void ena_free_rx_page(struct ena_ring *rx_ring, @@ -1068,7 +1068,7 @@ static void ena_free_rx_page(struct ena_ring *rx_ring, return; } - ena_unmap_rx_buff(rx_ring, rx_info); + ena_unmap_rx_buff_attrs(rx_ring, rx_info, 0); __free_page(page); rx_info->page = NULL; @@ -1406,14 +1406,14 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) return tx_pkts; } -static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, void *first_frag) +static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, void *first_frag, u16 len) { struct sk_buff *skb; if (!first_frag) - skb = napi_alloc_skb(rx_ring->napi, rx_ring->rx_copybreak); + skb = napi_alloc_skb(rx_ring->napi, len); else - skb = napi_build_skb(first_frag, ENA_PAGE_SIZE); + skb = napi_build_skb(first_frag, len); if (unlikely(!skb)) { ena_increase_stat(&rx_ring->rx_stats.skb_alloc_fail, 1, @@ -1422,24 +1422,47 @@ static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, void *first_frag) netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev, "Failed to allocate skb. first_frag %s\n", first_frag ? "provided" : "not provided"); - return NULL; } return skb; } +static bool ena_try_rx_buf_page_reuse(struct ena_rx_buffer *rx_info, u16 buf_len, + u16 len, int pkt_offset) +{ + struct ena_com_buf *ena_buf = &rx_info->ena_buf; + + /* More than ENA_MIN_RX_BUF_SIZE left in the reused buffer + * for data + headroom + tailroom. + */ + if (SKB_DATA_ALIGN(len + pkt_offset) + ENA_MIN_RX_BUF_SIZE <= ena_buf->len) { + page_ref_inc(rx_info->page); + rx_info->page_offset += buf_len; + ena_buf->paddr += buf_len; + ena_buf->len -= buf_len; + return true; + } + + return false; +} + static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs, u32 descs, u16 *next_to_clean) { + int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + bool is_xdp_loaded = ena_xdp_present_ring(rx_ring); struct ena_rx_buffer *rx_info; struct ena_adapter *adapter; + int page_offset, pkt_offset; + dma_addr_t pre_reuse_paddr; u16 len, req_id, buf = 0; + bool reuse_rx_buf_page; struct sk_buff *skb; - void *page_addr; - u32 page_offset; - void *data_addr; + void *buf_addr; + int buf_offset; + u16 buf_len; len = ena_bufs[buf].len; req_id = ena_bufs[buf].req_id; @@ -1459,34 +1482,30 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, "rx_info %p page %p\n", rx_info, rx_info->page); - /* save virt address of first buffer */ - page_addr = page_address(rx_info->page); + buf_offset = rx_info->buf_offset; + pkt_offset = buf_offset - rx_ring->rx_headroom; page_offset = rx_info->page_offset; - data_addr = page_addr + page_offset; - - prefetch(data_addr); + buf_addr = page_address(rx_info->page) + page_offset; if (len <= rx_ring->rx_copybreak) { - skb = ena_alloc_skb(rx_ring, NULL); + skb = ena_alloc_skb(rx_ring, NULL, len); if (unlikely(!skb)) return NULL; - netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, - "RX allocated small packet. len %d. data_len %d\n", - skb->len, skb->data_len); - /* sync this buffer for CPU use */ dma_sync_single_for_cpu(rx_ring->dev, - dma_unmap_addr(&rx_info->ena_buf, paddr), + dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset, len, DMA_FROM_DEVICE); - skb_copy_to_linear_data(skb, data_addr, len); + skb_copy_to_linear_data(skb, buf_addr + buf_offset, len); dma_sync_single_for_device(rx_ring->dev, - dma_unmap_addr(&rx_info->ena_buf, paddr), + dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset, len, DMA_FROM_DEVICE); skb_put(skb, len); + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "RX allocated small packet. len %d.\n", skb->len); skb->protocol = eth_type_trans(skb, rx_ring->netdev); rx_ring->free_ids[*next_to_clean] = req_id; *next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs, @@ -1494,14 +1513,28 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, return skb; } - ena_unmap_rx_buff(rx_ring, rx_info); + buf_len = SKB_DATA_ALIGN(len + buf_offset + tailroom); + + pre_reuse_paddr = dma_unmap_addr(&rx_info->ena_buf, paddr); + + /* If XDP isn't loaded try to reuse part of the RX buffer */ + reuse_rx_buf_page = !is_xdp_loaded && + ena_try_rx_buf_page_reuse(rx_info, buf_len, len, pkt_offset); - skb = ena_alloc_skb(rx_ring, page_addr); + dma_sync_single_for_cpu(rx_ring->dev, + pre_reuse_paddr + pkt_offset, + len, + DMA_FROM_DEVICE); + + if (!reuse_rx_buf_page) + ena_unmap_rx_buff_attrs(rx_ring, rx_info, DMA_ATTR_SKIP_CPU_SYNC); + + skb = ena_alloc_skb(rx_ring, buf_addr, buf_len); if (unlikely(!skb)) return NULL; /* Populate skb's linear part */ - skb_reserve(skb, page_offset); + skb_reserve(skb, buf_offset); skb_put(skb, len); skb->protocol = eth_type_trans(skb, rx_ring->netdev); @@ -1510,7 +1543,8 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, "RX skb updated. len %d. data_len %d\n", skb->len, skb->data_len); - rx_info->page = NULL; + if (!reuse_rx_buf_page) + rx_info->page = NULL; rx_ring->free_ids[*next_to_clean] = req_id; *next_to_clean = @@ -1525,10 +1559,28 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, rx_info = &rx_ring->rx_buffer_info[req_id]; - ena_unmap_rx_buff(rx_ring, rx_info); + /* rx_info->buf_offset includes rx_ring->rx_headroom */ + buf_offset = rx_info->buf_offset; + pkt_offset = buf_offset - rx_ring->rx_headroom; + buf_len = SKB_DATA_ALIGN(len + buf_offset + tailroom); + page_offset = rx_info->page_offset; + + pre_reuse_paddr = dma_unmap_addr(&rx_info->ena_buf, paddr); + + reuse_rx_buf_page = !is_xdp_loaded && + ena_try_rx_buf_page_reuse(rx_info, buf_len, len, pkt_offset); + + dma_sync_single_for_cpu(rx_ring->dev, + pre_reuse_paddr + pkt_offset, + len, + DMA_FROM_DEVICE); + + if (!reuse_rx_buf_page) + ena_unmap_rx_buff_attrs(rx_ring, rx_info, + DMA_ATTR_SKIP_CPU_SYNC); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page, - rx_info->page_offset, len, ENA_PAGE_SIZE); + page_offset + buf_offset, len, buf_len); } while (1); @@ -1626,7 +1678,7 @@ static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp) rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]; xdp_prepare_buff(xdp, page_address(rx_info->page), - rx_info->page_offset, + rx_info->buf_offset, rx_ring->ena_bufs[0].len, false); /* If for some reason we received a bigger packet than * we expect, then we simply drop it @@ -1638,7 +1690,7 @@ static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp) /* The xdp program might expand the headers */ if (ret == ENA_XDP_PASS) { - rx_info->page_offset = xdp->data - xdp->data_hard_start; + rx_info->buf_offset = xdp->data - xdp->data_hard_start; rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data; } @@ -1693,7 +1745,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, /* First descriptor might have an offset set by the device */ rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]; - rx_info->page_offset += ena_rx_ctx.pkt_offset; + rx_info->buf_offset += ena_rx_ctx.pkt_offset; netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n", @@ -1723,8 +1775,9 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, * from RX side. */ if (xdp_verdict & ENA_XDP_FORWARDED) { - ena_unmap_rx_buff(rx_ring, - &rx_ring->rx_buffer_info[req_id]); + ena_unmap_rx_buff_attrs(rx_ring, + &rx_ring->rx_buffer_info[req_id], + 0); rx_ring->rx_buffer_info[req_id].page = NULL; } } @@ -3233,7 +3286,8 @@ static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pd ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK | ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK | ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_MASK | - ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK; + ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK | + ENA_ADMIN_HOST_INFO_RX_PAGE_REUSE_MASK; rc = ena_com_set_host_attributes(ena_dev); if (rc) { diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 5a0d4ee76172..248b715b4d68 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -51,6 +51,8 @@ #define ENA_DEFAULT_RING_SIZE (1024) #define ENA_MIN_RING_SIZE (256) +#define ENA_MIN_RX_BUF_SIZE (2048) + #define ENA_MIN_NUM_IO_QUEUES (1) #define ENA_TX_WAKEUP_THRESH (MAX_SKB_FRAGS + 2) @@ -175,7 +177,9 @@ struct ena_tx_buffer { struct ena_rx_buffer { struct sk_buff *skb; struct page *page; + dma_addr_t dma_addr; u32 page_offset; + u32 buf_offset; struct ena_com_buf ena_buf; } ____cacheline_aligned; diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index 483a070d96fa..36f9b932b9e2 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -196,7 +196,7 @@ int pdsc_qcq_alloc(struct pdsc *pdsc, unsigned int type, unsigned int index, dma_addr_t q_base_pa; int err; - qcq->q.info = vzalloc(num_descs * sizeof(*qcq->q.info)); + qcq->q.info = vcalloc(num_descs, sizeof(*qcq->q.info)); if (!qcq->q.info) { err = -ENOMEM; goto err_out; @@ -219,7 +219,7 @@ int pdsc_qcq_alloc(struct pdsc *pdsc, unsigned int type, unsigned int index, if (err) goto err_out_free_q_info; - qcq->cq.info = vzalloc(num_descs * sizeof(*qcq->cq.info)); + qcq->cq.info = vcalloc(num_descs, sizeof(*qcq->cq.info)); if (!qcq->cq.info) { err = -ENOMEM; goto err_out_free_irq; @@ -464,7 +464,8 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing) { int i; - pdsc_devcmd_reset(pdsc); + if (!pdsc->pdev->is_virtfn) + pdsc_devcmd_reset(pdsc); pdsc_qcq_free(pdsc, &pdsc->notifyqcq); pdsc_qcq_free(pdsc, &pdsc->adminqcq); @@ -524,7 +525,8 @@ static void pdsc_fw_down(struct pdsc *pdsc) } /* Notify clients of fw_down */ - devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc); + if (pdsc->fw_reporter) + devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc); pdsc_notify(PDS_EVENT_RESET, &reset_event); pdsc_stop(pdsc); @@ -554,8 +556,9 @@ static void pdsc_fw_up(struct pdsc *pdsc) /* Notify clients of fw_up */ pdsc->fw_recoveries++; - devlink_health_reporter_state_update(pdsc->fw_reporter, - DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); + if (pdsc->fw_reporter) + devlink_health_reporter_state_update(pdsc->fw_reporter, + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); pdsc_notify(PDS_EVENT_RESET, &reset_event); return; diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c index debe5216fe29..f77cd9f5a2fd 100644 --- a/drivers/net/ethernet/amd/pds_core/dev.c +++ b/drivers/net/ethernet/amd/pds_core/dev.c @@ -121,7 +121,7 @@ static const char *pdsc_devcmd_str(int opcode) } } -static int pdsc_devcmd_wait(struct pdsc *pdsc, int max_seconds) +static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds) { struct device *dev = pdsc->dev; unsigned long start_time; @@ -131,9 +131,6 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, int max_seconds) int done = 0; int err = 0; int status; - int opcode; - - opcode = ioread8(&pdsc->cmd_regs->cmd.opcode); start_time = jiffies; max_wait = start_time + (max_seconds * HZ); @@ -180,10 +177,10 @@ int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd, memcpy_toio(&pdsc->cmd_regs->cmd, cmd, sizeof(*cmd)); pdsc_devcmd_dbell(pdsc); - err = pdsc_devcmd_wait(pdsc, max_seconds); + err = pdsc_devcmd_wait(pdsc, cmd->opcode, max_seconds); memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp)); - if (err == -ENXIO || err == -ETIMEDOUT) + if ((err == -ENXIO || err == -ETIMEDOUT) && pdsc->wq) queue_work(pdsc->wq, &pdsc->health_work); return err; diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c index 9c6b3653c1c7..d9607033bbf2 100644 --- a/drivers/net/ethernet/amd/pds_core/devlink.c +++ b/drivers/net/ethernet/amd/pds_core/devlink.c @@ -10,6 +10,9 @@ pdsc_viftype *pdsc_dl_find_viftype_by_id(struct pdsc *pdsc, { int vt; + if (!pdsc->viftype_status) + return NULL; + for (vt = 0; vt < PDS_DEV_TYPE_MAX; vt++) { if (pdsc->viftype_status[vt].dl_id == dl_id) return &pdsc->viftype_status[vt]; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index 16e7fb2c0dae..6a716337f48b 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -2782,9 +2782,9 @@ static bool xgbe_phy_valid_speed_baset_mode(struct xgbe_prv_data *pdata, switch (speed) { case SPEED_10: - /* Supported in ver >= 30H */ + /* Supported in ver 21H and ver >= 30H */ ver = XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER); - return (ver >= 0x30) ? true : false; + return (ver == 0x21 || ver >= 0x30); case SPEED_100: case SPEED_1000: return true; @@ -2806,9 +2806,10 @@ static bool xgbe_phy_valid_speed_sfp_mode(struct xgbe_prv_data *pdata, switch (speed) { case SPEED_10: - /* Supported in ver >= 30H */ + /* Supported in ver 21H and ver >= 30H */ ver = XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER); - return (ver >= 0x30) && (phy_data->sfp_speed == XGBE_SFP_SPEED_100_1000); + return ((ver == 0x21 || ver >= 0x30) && + (phy_data->sfp_speed == XGBE_SFP_SPEED_100_1000)); case SPEED_100: return (phy_data->sfp_speed == XGBE_SFP_SPEED_100_1000); case SPEED_1000: @@ -3158,9 +3159,9 @@ static bool xgbe_phy_port_mode_mismatch(struct xgbe_prv_data *pdata) struct xgbe_phy_data *phy_data = pdata->phy_data; unsigned int ver; - /* 10 Mbps speed is not supported in ver < 30H */ + /* 10 Mbps speed is supported in ver 21H and ver >= 30H */ ver = XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER); - if (ver < 0x30 && (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10)) + if ((ver < 0x30 && ver != 0x21) && (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10)) return true; switch (phy_data->port_mode) { diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c index 7eb5851eb95d..6afff8af5e86 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c @@ -289,7 +289,7 @@ static int aq_get_txsc_stats(struct aq_hw_s *hw, const int sc_idx, static int aq_mdo_dev_open(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); int ret = 0; if (netif_carrier_ok(nic->ndev)) @@ -300,7 +300,7 @@ static int aq_mdo_dev_open(struct macsec_context *ctx) static int aq_mdo_dev_stop(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); int i; for (i = 0; i < AQ_MACSEC_MAX_SC; i++) { @@ -439,7 +439,7 @@ static enum aq_macsec_sc_sa sc_sa_from_num_an(const int num_an) static int aq_mdo_add_secy(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); struct aq_macsec_cfg *cfg = nic->macsec_cfg; const struct macsec_secy *secy = ctx->secy; enum aq_macsec_sc_sa sc_sa; @@ -474,7 +474,7 @@ static int aq_mdo_add_secy(struct macsec_context *ctx) static int aq_mdo_upd_secy(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); const struct macsec_secy *secy = ctx->secy; int txsc_idx; int ret = 0; @@ -528,7 +528,7 @@ static int aq_clear_txsc(struct aq_nic_s *nic, const int txsc_idx, static int aq_mdo_del_secy(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); int ret = 0; if (!nic->macsec_cfg) @@ -576,7 +576,7 @@ static int aq_update_txsa(struct aq_nic_s *nic, const unsigned int sc_idx, static int aq_mdo_add_txsa(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); struct aq_macsec_cfg *cfg = nic->macsec_cfg; const struct macsec_secy *secy = ctx->secy; struct aq_macsec_txsc *aq_txsc; @@ -603,7 +603,7 @@ static int aq_mdo_add_txsa(struct macsec_context *ctx) static int aq_mdo_upd_txsa(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); struct aq_macsec_cfg *cfg = nic->macsec_cfg; const struct macsec_secy *secy = ctx->secy; struct aq_macsec_txsc *aq_txsc; @@ -652,7 +652,7 @@ static int aq_clear_txsa(struct aq_nic_s *nic, struct aq_macsec_txsc *aq_txsc, static int aq_mdo_del_txsa(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); struct aq_macsec_cfg *cfg = nic->macsec_cfg; int txsc_idx; int ret = 0; @@ -744,7 +744,7 @@ static int aq_set_rxsc(struct aq_nic_s *nic, const u32 rxsc_idx) static int aq_mdo_add_rxsc(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); struct aq_macsec_cfg *cfg = nic->macsec_cfg; const u32 rxsc_idx_max = aq_sc_idx_max(cfg->sc_sa); u32 rxsc_idx; @@ -775,7 +775,7 @@ static int aq_mdo_add_rxsc(struct macsec_context *ctx) static int aq_mdo_upd_rxsc(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); int rxsc_idx; int ret = 0; @@ -838,7 +838,7 @@ static int aq_clear_rxsc(struct aq_nic_s *nic, const int rxsc_idx, static int aq_mdo_del_rxsc(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); enum aq_clear_type clear_type = AQ_CLEAR_SW; int rxsc_idx; int ret = 0; @@ -906,8 +906,8 @@ static int aq_update_rxsa(struct aq_nic_s *nic, const unsigned int sc_idx, static int aq_mdo_add_rxsa(struct macsec_context *ctx) { + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); const struct macsec_rx_sc *rx_sc = ctx->sa.rx_sa->sc; - struct aq_nic_s *nic = netdev_priv(ctx->netdev); const struct macsec_secy *secy = ctx->secy; struct aq_macsec_rxsc *aq_rxsc; int rxsc_idx; @@ -933,8 +933,8 @@ static int aq_mdo_add_rxsa(struct macsec_context *ctx) static int aq_mdo_upd_rxsa(struct macsec_context *ctx) { + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); const struct macsec_rx_sc *rx_sc = ctx->sa.rx_sa->sc; - struct aq_nic_s *nic = netdev_priv(ctx->netdev); struct aq_macsec_cfg *cfg = nic->macsec_cfg; const struct macsec_secy *secy = ctx->secy; int rxsc_idx; @@ -982,8 +982,8 @@ static int aq_clear_rxsa(struct aq_nic_s *nic, struct aq_macsec_rxsc *aq_rxsc, static int aq_mdo_del_rxsa(struct macsec_context *ctx) { + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); const struct macsec_rx_sc *rx_sc = ctx->sa.rx_sa->sc; - struct aq_nic_s *nic = netdev_priv(ctx->netdev); struct aq_macsec_cfg *cfg = nic->macsec_cfg; int rxsc_idx; int ret = 0; @@ -1000,7 +1000,7 @@ static int aq_mdo_del_rxsa(struct macsec_context *ctx) static int aq_mdo_get_dev_stats(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); struct aq_macsec_common_stats *stats = &nic->macsec_cfg->stats; struct aq_hw_s *hw = nic->aq_hw; @@ -1020,7 +1020,7 @@ static int aq_mdo_get_dev_stats(struct macsec_context *ctx) static int aq_mdo_get_tx_sc_stats(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); struct aq_macsec_tx_sc_stats *stats; struct aq_hw_s *hw = nic->aq_hw; struct aq_macsec_txsc *aq_txsc; @@ -1044,7 +1044,7 @@ static int aq_mdo_get_tx_sc_stats(struct macsec_context *ctx) static int aq_mdo_get_tx_sa_stats(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); struct aq_macsec_cfg *cfg = nic->macsec_cfg; struct aq_macsec_tx_sa_stats *stats; struct aq_hw_s *hw = nic->aq_hw; @@ -1084,7 +1084,7 @@ static int aq_mdo_get_tx_sa_stats(struct macsec_context *ctx) static int aq_mdo_get_rx_sc_stats(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); struct aq_macsec_cfg *cfg = nic->macsec_cfg; struct aq_macsec_rx_sa_stats *stats; struct aq_hw_s *hw = nic->aq_hw; @@ -1129,7 +1129,7 @@ static int aq_mdo_get_rx_sc_stats(struct macsec_context *ctx) static int aq_mdo_get_rx_sa_stats(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); struct aq_macsec_cfg *cfg = nic->macsec_cfg; struct aq_macsec_rx_sa_stats *stats; struct aq_hw_s *hw = nic->aq_hw; @@ -1399,7 +1399,7 @@ static void aq_check_txsa_expiration(struct aq_nic_s *nic) #define AQ_LOCKED_MDO_DEF(mdo) \ static int aq_locked_mdo_##mdo(struct macsec_context *ctx) \ { \ - struct aq_nic_s *nic = netdev_priv(ctx->netdev); \ + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); \ int ret; \ mutex_lock(&nic->macsec_mutex); \ ret = aq_mdo_##mdo(ctx); \ diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 7f933175cbda..4de22eed099a 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -532,10 +532,10 @@ static bool aq_add_rx_fragment(struct device *dev, buff_->rxdata.pg_off, buff_->len, DMA_FROM_DEVICE); - skb_frag_off_set(frag, buff_->rxdata.pg_off); - skb_frag_size_set(frag, buff_->len); sinfo->xdp_frags_size += buff_->len; - __skb_frag_set_page(frag, buff_->rxdata.page); + skb_frag_fill_page_desc(frag, buff_->rxdata.page, + buff_->rxdata.pg_off, + buff_->len); buff_->is_cleaned = 1; diff --git a/drivers/net/ethernet/arc/emac.h b/drivers/net/ethernet/arc/emac.h index d820ae03a966..0e244f0e25fd 100644 --- a/drivers/net/ethernet/arc/emac.h +++ b/drivers/net/ethernet/arc/emac.h @@ -220,6 +220,6 @@ static inline void arc_reg_clr(struct arc_emac_priv *priv, int reg, int mask) int arc_mdio_probe(struct arc_emac_priv *priv); int arc_mdio_remove(struct arc_emac_priv *priv); int arc_emac_probe(struct net_device *ndev, int interface); -int arc_emac_remove(struct net_device *ndev); +void arc_emac_remove(struct net_device *ndev); #endif /* ARC_EMAC_H */ diff --git a/drivers/net/ethernet/arc/emac_arc.c b/drivers/net/ethernet/arc/emac_arc.c index 800620b8f10d..ce3147e886a1 100644 --- a/drivers/net/ethernet/arc/emac_arc.c +++ b/drivers/net/ethernet/arc/emac_arc.c @@ -61,11 +61,11 @@ out_netdev: static int emac_arc_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); - int err; - err = arc_emac_remove(ndev); + arc_emac_remove(ndev); free_netdev(ndev); - return err; + + return 0; } static const struct of_device_id emac_arc_dt_ids[] = { diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index ba0646b3b122..2b427d8a1831 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -1008,7 +1008,7 @@ out_put_node: } EXPORT_SYMBOL_GPL(arc_emac_probe); -int arc_emac_remove(struct net_device *ndev) +void arc_emac_remove(struct net_device *ndev) { struct arc_emac_priv *priv = netdev_priv(ndev); @@ -1019,8 +1019,6 @@ int arc_emac_remove(struct net_device *ndev) if (!IS_ERR(priv->clk)) clk_disable_unprepare(priv->clk); - - return 0; } EXPORT_SYMBOL_GPL(arc_emac_remove); diff --git a/drivers/net/ethernet/arc/emac_rockchip.c b/drivers/net/ethernet/arc/emac_rockchip.c index 1c9ca3bcb871..509101112279 100644 --- a/drivers/net/ethernet/arc/emac_rockchip.c +++ b/drivers/net/ethernet/arc/emac_rockchip.c @@ -248,9 +248,8 @@ static int emac_rockchip_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct rockchip_priv_data *priv = netdev_priv(ndev); - int err; - err = arc_emac_remove(ndev); + arc_emac_remove(ndev); clk_disable_unprepare(priv->refclk); @@ -261,7 +260,7 @@ static int emac_rockchip_remove(struct platform_device *pdev) clk_disable_unprepare(priv->macclk); free_netdev(ndev); - return err; + return 0; } static struct platform_driver emac_rockchip_driver = { diff --git a/drivers/net/ethernet/atheros/alx/ethtool.c b/drivers/net/ethernet/atheros/alx/ethtool.c index b716adacd815..7f6b69a52367 100644 --- a/drivers/net/ethernet/atheros/alx/ethtool.c +++ b/drivers/net/ethernet/atheros/alx/ethtool.c @@ -292,9 +292,8 @@ static void alx_get_ethtool_stats(struct net_device *netdev, spin_lock(&alx->stats_lock); alx_update_hw_stats(hw); - BUILD_BUG_ON(sizeof(hw->stats) - offsetof(struct alx_hw_stats, rx_ok) < - ALX_NUM_STATS * sizeof(u64)); - memcpy(data, &hw->stats.rx_ok, ALX_NUM_STATS * sizeof(u64)); + BUILD_BUG_ON(sizeof(hw->stats) != ALX_NUM_STATS * sizeof(u64)); + memcpy(data, &hw->stats, sizeof(hw->stats)); spin_unlock(&alx->stats_lock); } diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 4a288799633f..940c5d1ff9cf 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -2094,8 +2094,11 @@ static int atl1c_tso_csum(struct atl1c_adapter *adapter, real_len = (((unsigned char *)ip_hdr(skb) - skb->data) + ntohs(ip_hdr(skb)->tot_len)); - if (real_len < skb->len) - pskb_trim(skb, real_len); + if (real_len < skb->len) { + err = pskb_trim(skb, real_len); + if (err) + return err; + } hdr_len = skb_tcp_all_headers(skb); if (unlikely(skb->len == hdr_len)) { diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 5db0f3495a32..5935be190b9e 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -1641,8 +1641,11 @@ static int atl1e_tso_csum(struct atl1e_adapter *adapter, real_len = (((unsigned char *)ip_hdr(skb) - skb->data) + ntohs(ip_hdr(skb)->tot_len)); - if (real_len < skb->len) - pskb_trim(skb, real_len); + if (real_len < skb->len) { + err = pskb_trim(skb, real_len); + if (err) + return err; + } hdr_len = skb_tcp_all_headers(skb); if (unlikely(skb->len == hdr_len)) { diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c index c8444bcdf527..02aa6fd8ebc2 100644 --- a/drivers/net/ethernet/atheros/atlx/atl1.c +++ b/drivers/net/ethernet/atheros/atlx/atl1.c @@ -2113,8 +2113,11 @@ static int atl1_tso(struct atl1_adapter *adapter, struct sk_buff *skb, real_len = (((unsigned char *)iph - skb->data) + ntohs(iph->tot_len)); - if (real_len < skb->len) - pskb_trim(skb, real_len); + if (real_len < skb->len) { + err = pskb_trim(skb, real_len); + if (err) + return err; + } hdr_len = skb_tcp_all_headers(skb); if (skb->len == hdr_len) { iph->check = 0; diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 392ec09a1d8a..3e4fb3c3e834 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -1793,11 +1793,9 @@ static int b44_nway_reset(struct net_device *dev) b44_readphy(bp, MII_BMCR, &bmcr); b44_readphy(bp, MII_BMCR, &bmcr); r = -EINVAL; - if (bmcr & BMCR_ANENABLE) { - b44_writephy(bp, MII_BMCR, - bmcr | BMCR_ANRESTART); - r = 0; - } + if (bmcr & BMCR_ANENABLE) + r = b44_writephy(bp, MII_BMCR, + bmcr | BMCR_ANRESTART); spin_unlock_irq(&bp->lock); return r; diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 1761df8fb7f9..52ee3751187a 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1448,7 +1448,7 @@ int bgmac_phy_connect_direct(struct bgmac *bgmac) int err; phy_dev = fixed_phy_register(PHY_POLL, &fphy_status, NULL); - if (!phy_dev || IS_ERR(phy_dev)) { + if (IS_ERR(phy_dev)) { dev_err(bgmac->dev, "Failed to register fixed PHY device\n"); return -ENODEV; } @@ -1492,8 +1492,6 @@ int bgmac_enet_probe(struct bgmac *bgmac) bgmac->in_init = true; - bgmac_chip_intrs_off(bgmac); - net_dev->irq = bgmac->irq; SET_NETDEV_DEV(net_dev, bgmac->dev); dev_set_drvdata(bgmac->dev, bgmac); @@ -1511,6 +1509,8 @@ int bgmac_enet_probe(struct bgmac *bgmac) */ bgmac_clk_enable(bgmac, 0); + bgmac_chip_intrs_off(bgmac); + /* This seems to be fixing IRQ by assigning OOB #6 to the core */ if (!(bgmac->feature_flags & BGMAC_FEAT_IDM_MASK)) { if (bgmac->feature_flags & BGMAC_FEAT_IRQ_ID_OOB_6) diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 466e1d62bcf6..0d917a9699c5 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -2955,7 +2955,6 @@ bnx2_reuse_rx_skb_pages(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, shinfo = skb_shinfo(skb); shinfo->nr_frags--; page = skb_frag_page(&shinfo->frags[shinfo->nr_frags]); - __skb_frag_set_page(&shinfo->frags[shinfo->nr_frags], NULL); cons_rx_pg->page = page; dev_kfree_skb(skb); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 8bcde0a6e011..e2a4e1088b7f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1508,6 +1508,8 @@ struct bnx2x { bool cnic_loaded; struct cnic_eth_dev *(*cnic_probe)(struct net_device *); + bool nic_stopped; + /* Flag that indicates that we can start looking for FCoE L2 queue * completions in the default status block. */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 6ea5521074d3..e9c1e1bb5580 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2715,6 +2715,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) bnx2x_add_all_napi(bp); DP(NETIF_MSG_IFUP, "napi added\n"); bnx2x_napi_enable(bp); + bp->nic_stopped = false; if (IS_PF(bp)) { /* set pf load just before approaching the MCP */ @@ -2960,6 +2961,7 @@ load_error2: load_error1: bnx2x_napi_disable(bp); bnx2x_del_all_napi(bp); + bp->nic_stopped = true; /* clear pf_load status, as it was already set */ if (IS_PF(bp)) @@ -3095,14 +3097,17 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link) if (!CHIP_IS_E1x(bp)) bnx2x_pf_disable(bp); - /* Disable HW interrupts, NAPI */ - bnx2x_netif_stop(bp, 1); - /* Delete all NAPI objects */ - bnx2x_del_all_napi(bp); - if (CNIC_LOADED(bp)) - bnx2x_del_all_napi_cnic(bp); - /* Release IRQs */ - bnx2x_free_irq(bp); + if (!bp->nic_stopped) { + /* Disable HW interrupts, NAPI */ + bnx2x_netif_stop(bp, 1); + /* Delete all NAPI objects */ + bnx2x_del_all_napi(bp); + if (CNIC_LOADED(bp)) + bnx2x_del_all_napi_cnic(bp); + /* Release IRQs */ + bnx2x_free_irq(bp); + bp->nic_stopped = true; + } /* Report UNLOAD_DONE to MCP */ bnx2x_send_unload_done(bp, false); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 1e7a6f1d4223..0d8e61c63c7c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -9474,15 +9474,18 @@ unload_error: } } - /* Disable HW interrupts, NAPI */ - bnx2x_netif_stop(bp, 1); - /* Delete all NAPI objects */ - bnx2x_del_all_napi(bp); - if (CNIC_LOADED(bp)) - bnx2x_del_all_napi_cnic(bp); + if (!bp->nic_stopped) { + /* Disable HW interrupts, NAPI */ + bnx2x_netif_stop(bp, 1); + /* Delete all NAPI objects */ + bnx2x_del_all_napi(bp); + if (CNIC_LOADED(bp)) + bnx2x_del_all_napi_cnic(bp); - /* Release IRQs */ - bnx2x_free_irq(bp); + /* Release IRQs */ + bnx2x_free_irq(bp); + bp->nic_stopped = true; + } /* Reset the chip, unless PCI function is offline. If we reach this * point following a PCI error handling, it means device is really @@ -14238,13 +14241,16 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev) } bnx2x_drain_tx_queues(bp); bnx2x_send_unload_req(bp, UNLOAD_RECOVERY); - bnx2x_netif_stop(bp, 1); - bnx2x_del_all_napi(bp); + if (!bp->nic_stopped) { + bnx2x_netif_stop(bp, 1); + bnx2x_del_all_napi(bp); - if (CNIC_LOADED(bp)) - bnx2x_del_all_napi_cnic(bp); + if (CNIC_LOADED(bp)) + bnx2x_del_all_napi_cnic(bp); - bnx2x_free_irq(bp); + bnx2x_free_irq(bp); + bp->nic_stopped = true; + } /* Report UNLOAD_DONE to MCP */ bnx2x_send_unload_done(bp, true); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index 0657a0f5170f..8946a931e87e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -529,13 +529,16 @@ void bnx2x_vfpf_close_vf(struct bnx2x *bp) bnx2x_vfpf_finalize(bp, &req->first_tlv); free_irq: - /* Disable HW interrupts, NAPI */ - bnx2x_netif_stop(bp, 0); - /* Delete all NAPI objects */ - bnx2x_del_all_napi(bp); - - /* Release IRQs */ - bnx2x_free_irq(bp); + if (!bp->nic_stopped) { + /* Disable HW interrupts, NAPI */ + bnx2x_netif_stop(bp, 0); + /* Delete all NAPI objects */ + bnx2x_del_all_napi(bp); + + /* Release IRQs */ + bnx2x_free_irq(bp); + bp->nic_stopped = true; + } } static void bnx2x_leading_vfq_init(struct bnx2x *bp, struct bnx2x_virtf *vf, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b499bc9c4e06..1eb490c48c52 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -633,12 +633,13 @@ tx_kick_pending: return NETDEV_TX_OK; } -static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts) +static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) { struct bnxt_tx_ring_info *txr = bnapi->tx_ring; struct netdev_queue *txq = netdev_get_tx_queue(bp->dev, txr->txq_index); u16 cons = txr->tx_cons; struct pci_dev *pdev = bp->pdev; + int nr_pkts = bnapi->tx_pkts; int i; unsigned int tx_bytes = 0; @@ -688,6 +689,7 @@ next_tx_int: dev_kfree_skb_any(skb); } + bnapi->tx_pkts = 0; WRITE_ONCE(txr->tx_cons, cons); __netif_txq_completed_wake(txq, nr_pkts, tx_bytes, @@ -697,17 +699,24 @@ next_tx_int: static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping, struct bnxt_rx_ring_info *rxr, + unsigned int *offset, gfp_t gfp) { struct device *dev = &bp->pdev->dev; struct page *page; - page = page_pool_dev_alloc_pages(rxr->page_pool); + if (PAGE_SIZE > BNXT_RX_PAGE_SIZE) { + page = page_pool_dev_alloc_frag(rxr->page_pool, offset, + BNXT_RX_PAGE_SIZE); + } else { + page = page_pool_dev_alloc_pages(rxr->page_pool); + *offset = 0; + } if (!page) return NULL; - *mapping = dma_map_page_attrs(dev, page, 0, PAGE_SIZE, bp->rx_dir, - DMA_ATTR_WEAK_ORDERING); + *mapping = dma_map_page_attrs(dev, page, *offset, BNXT_RX_PAGE_SIZE, + bp->rx_dir, DMA_ATTR_WEAK_ORDERING); if (dma_mapping_error(dev, *mapping)) { page_pool_recycle_direct(rxr->page_pool, page); return NULL; @@ -747,15 +756,16 @@ int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, dma_addr_t mapping; if (BNXT_RX_PAGE_MODE(bp)) { + unsigned int offset; struct page *page = - __bnxt_alloc_rx_page(bp, &mapping, rxr, gfp); + __bnxt_alloc_rx_page(bp, &mapping, rxr, &offset, gfp); if (!page) return -ENOMEM; mapping += bp->rx_dma_offset; rx_buf->data = page; - rx_buf->data_ptr = page_address(page) + bp->rx_offset; + rx_buf->data_ptr = page_address(page) + offset + bp->rx_offset; } else { u8 *data = __bnxt_alloc_rx_frag(bp, &mapping, gfp); @@ -815,7 +825,7 @@ static inline int bnxt_alloc_rx_page(struct bnxt *bp, unsigned int offset = 0; if (BNXT_RX_PAGE_MODE(bp)) { - page = __bnxt_alloc_rx_page(bp, &mapping, rxr, gfp); + page = __bnxt_alloc_rx_page(bp, &mapping, rxr, &offset, gfp); if (!page) return -ENOMEM; @@ -962,15 +972,15 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp, return NULL; } dma_addr -= bp->rx_dma_offset; - dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir, - DMA_ATTR_WEAK_ORDERING); - skb = build_skb(page_address(page), PAGE_SIZE); + dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, BNXT_RX_PAGE_SIZE, + bp->rx_dir, DMA_ATTR_WEAK_ORDERING); + skb = build_skb(data_ptr - bp->rx_offset, BNXT_RX_PAGE_SIZE); if (!skb) { page_pool_recycle_direct(rxr->page_pool, page); return NULL; } skb_mark_for_recycle(skb); - skb_reserve(skb, bp->rx_dma_offset); + skb_reserve(skb, bp->rx_offset); __skb_put(skb, len); return skb; @@ -996,8 +1006,8 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp, return NULL; } dma_addr -= bp->rx_dma_offset; - dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir, - DMA_ATTR_WEAK_ORDERING); + dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, BNXT_RX_PAGE_SIZE, + bp->rx_dir, DMA_ATTR_WEAK_ORDERING); if (unlikely(!payload)) payload = eth_get_headlen(bp->dev, data_ptr, len); @@ -1010,7 +1020,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp, skb_mark_for_recycle(skb); off = (void *)data_ptr - page_address(page); - skb_add_rx_frag(skb, 0, page, off, len, PAGE_SIZE); + skb_add_rx_frag(skb, 0, page, off, len, BNXT_RX_PAGE_SIZE); memcpy(skb->data - NET_IP_ALIGN, data_ptr - NET_IP_ALIGN, payload + NET_IP_ALIGN); @@ -1085,9 +1095,8 @@ static u32 __bnxt_rx_agg_pages(struct bnxt *bp, RX_AGG_CMP_LEN) >> RX_AGG_CMP_LEN_SHIFT; cons_rx_buf = &rxr->rx_agg_ring[cons]; - skb_frag_off_set(frag, cons_rx_buf->offset); - skb_frag_size_set(frag, frag_len); - __skb_frag_set_page(frag, cons_rx_buf->page); + skb_frag_fill_page_desc(frag, cons_rx_buf->page, + cons_rx_buf->offset, frag_len); shinfo->nr_frags = i + 1; __clear_bit(cons, rxr->rx_agg_bmap); @@ -1103,10 +1112,7 @@ static u32 __bnxt_rx_agg_pages(struct bnxt *bp, xdp_buff_set_frag_pfmemalloc(xdp); if (bnxt_alloc_rx_page(bp, rxr, prod, GFP_ATOMIC) != 0) { - unsigned int nr_frags; - - nr_frags = --shinfo->nr_frags; - __skb_frag_set_page(&shinfo->frags[nr_frags], NULL); + --shinfo->nr_frags; cons_rx_buf->page = page; /* Update prod since possibly some pages have been @@ -1145,7 +1151,7 @@ static struct sk_buff *bnxt_rx_agg_pages_skb(struct bnxt *bp, skb->data_len += total_frag_len; skb->len += total_frag_len; - skb->truesize += PAGE_SIZE * agg_bufs; + skb->truesize += BNXT_RX_PAGE_SIZE * agg_bufs; return skb; } @@ -2573,12 +2579,11 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, return rx_pkts; } -static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi) +static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi, + int budget) { - if (bnapi->tx_pkts) { - bnapi->tx_int(bp, bnapi, bnapi->tx_pkts); - bnapi->tx_pkts = 0; - } + if (bnapi->tx_pkts) + bnapi->tx_int(bp, bnapi, budget); if ((bnapi->events & BNXT_RX_EVENT) && !(bnapi->in_reset)) { struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; @@ -2607,7 +2612,7 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, */ bnxt_db_cq(bp, &cpr->cp_db, cpr->cp_raw_cons); - __bnxt_poll_work_done(bp, bnapi); + __bnxt_poll_work_done(bp, bnapi, budget); return rx_pkts; } @@ -2738,7 +2743,7 @@ static int __bnxt_poll_cqs(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) } static void __bnxt_poll_cqs_done(struct bnxt *bp, struct bnxt_napi *bnapi, - u64 dbr_type) + u64 dbr_type, int budget) { struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; int i; @@ -2754,7 +2759,7 @@ static void __bnxt_poll_cqs_done(struct bnxt *bp, struct bnxt_napi *bnapi, cpr2->had_work_done = 0; } } - __bnxt_poll_work_done(bp, bnapi); + __bnxt_poll_work_done(bp, bnapi, budget); } static int bnxt_poll_p5(struct napi_struct *napi, int budget) @@ -2784,7 +2789,8 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget) if (cpr->has_more_work) break; - __bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ_ARMALL); + __bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ_ARMALL, + budget); cpr->cp_raw_cons = raw_cons; if (napi_complete_done(napi, work_done)) BNXT_DB_NQ_ARM_P5(&cpr->cp_db, @@ -2814,7 +2820,7 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget) } raw_cons = NEXT_RAW_CMP(raw_cons); } - __bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ); + __bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ, budget); if (raw_cons != cpr->cp_raw_cons) { cpr->cp_raw_cons = raw_cons; BNXT_DB_NQ_P5(&cpr->cp_db, raw_cons); @@ -2947,8 +2953,8 @@ skip_rx_tpa_free: rx_buf->data = NULL; if (BNXT_RX_PAGE_MODE(bp)) { mapping -= bp->rx_dma_offset; - dma_unmap_page_attrs(&pdev->dev, mapping, PAGE_SIZE, - bp->rx_dir, + dma_unmap_page_attrs(&pdev->dev, mapping, + BNXT_RX_PAGE_SIZE, bp->rx_dir, DMA_ATTR_WEAK_ORDERING); page_pool_recycle_direct(rxr->page_pool, data); } else { @@ -3217,6 +3223,8 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp, pp.napi = &rxr->bnapi->napi; pp.dev = &bp->pdev->dev; pp.dma_dir = DMA_BIDIRECTIONAL; + if (PAGE_SIZE > BNXT_RX_PAGE_SIZE) + pp.flags |= PP_FLAG_PAGE_FRAG; rxr->page_pool = page_pool_create(&pp); if (IS_ERR(rxr->page_pool)) { @@ -3993,26 +4001,29 @@ void bnxt_set_ring_params(struct bnxt *bp) */ int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode) { + struct net_device *dev = bp->dev; + if (page_mode) { bp->flags &= ~BNXT_FLAG_AGG_RINGS; bp->flags |= BNXT_FLAG_RX_PAGE_MODE; - if (bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU) { + if (bp->xdp_prog->aux->xdp_has_frags) + dev->max_mtu = min_t(u16, bp->max_mtu, BNXT_MAX_MTU); + else + dev->max_mtu = + min_t(u16, bp->max_mtu, BNXT_MAX_PAGE_MODE_MTU); + if (dev->mtu > BNXT_MAX_PAGE_MODE_MTU) { bp->flags |= BNXT_FLAG_JUMBO; bp->rx_skb_func = bnxt_rx_multi_page_skb; - bp->dev->max_mtu = - min_t(u16, bp->max_mtu, BNXT_MAX_MTU); } else { bp->flags |= BNXT_FLAG_NO_AGG_RINGS; bp->rx_skb_func = bnxt_rx_page_skb; - bp->dev->max_mtu = - min_t(u16, bp->max_mtu, BNXT_MAX_PAGE_MODE_MTU); } bp->rx_dir = DMA_BIDIRECTIONAL; /* Disable LRO or GRO_HW */ - netdev_update_features(bp->dev); + netdev_update_features(dev); } else { - bp->dev->max_mtu = bp->max_mtu; + dev->max_mtu = bp->max_mtu; bp->flags &= ~BNXT_FLAG_RX_PAGE_MODE; bp->rx_dir = DMA_FROM_DEVICE; bp->rx_skb_func = bnxt_rx_skb; @@ -9433,6 +9444,8 @@ static void bnxt_enable_napi(struct bnxt *bp) cpr->sw_stats.rx.rx_resets++; bnapi->in_reset = false; + bnapi->tx_pkts = 0; + if (bnapi->rx_ring) { INIT_WORK(&cpr->dim.work, bnxt_dim_work); cpr->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 080e73496066..bb95c3dc5270 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1005,7 +1005,7 @@ struct bnxt_napi { struct bnxt_tx_ring_info *tx_ring; void (*tx_int)(struct bnxt *, struct bnxt_napi *, - int); + int budget); int tx_pkts; u8 events; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h index b31de4cf6534..a2d3a80236c4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h @@ -3721,6 +3721,60 @@ struct hwrm_func_backing_store_qcaps_v2_output { u8 valid; }; +/* hwrm_func_dbr_pacing_qcfg_input (size:128b/16B) */ +struct hwrm_func_dbr_pacing_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; +}; + +/* hwrm_func_dbr_pacing_qcfg_output (size:512b/64B) */ +struct hwrm_func_dbr_pacing_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 flags; +#define FUNC_DBR_PACING_QCFG_RESP_FLAGS_DBR_NQ_EVENT_ENABLED 0x1UL + u8 unused_0[7]; + __le32 dbr_stat_db_fifo_reg; +#define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK 0x3UL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_SFT 0 +#define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_PCIE_CFG 0x0UL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_GRC 0x1UL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_BAR0 0x2UL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_BAR1 0x3UL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_LAST \ + FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_BAR1 +#define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_MASK 0xfffffffcUL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SFT 2 + __le32 dbr_stat_db_fifo_reg_watermark_mask; + u8 dbr_stat_db_fifo_reg_watermark_shift; + u8 unused_1[3]; + __le32 dbr_stat_db_fifo_reg_fifo_room_mask; + u8 dbr_stat_db_fifo_reg_fifo_room_shift; + u8 unused_2[3]; + __le32 dbr_throttling_aeq_arm_reg; +#define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_MASK 0x3UL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_SFT 0 +#define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_PCIE_CFG 0x0UL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_GRC 0x1UL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_BAR0 0x2UL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_BAR1 0x3UL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_LAST \ + FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_BAR1 +#define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_MASK 0xfffffffcUL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SFT 2 + u8 dbr_throttling_aeq_arm_reg_val; + u8 unused_3[7]; + __le32 primary_nq_id; + __le32 pacing_threshold; + u8 unused_4[7]; + u8 valid; +}; + /* hwrm_func_drv_if_change_input (size:192b/24B) */ struct hwrm_func_drv_if_change_input { __le16 req_type; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index 852eb449ccae..6ba2b9398633 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -345,7 +345,7 @@ static void bnxt_set_edev_info(struct bnxt_en_dev *edev, struct bnxt *bp) edev->hw_ring_stats_size = bp->hw_ring_stats_size; edev->pf_port_id = bp->pf.port_id; edev->en_state = bp->state; - + edev->bar0 = bp->bar0; edev->ulp_tbl->msix_requested = bnxt_get_ulp_msix_num(bp); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h index 80cbc4b6130a..6ff77f082e6c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h @@ -81,6 +81,7 @@ struct bnxt_en_dev { * mode only. Will be * updated in resume. */ + void __iomem *bar0; }; static inline bool bnxt_ulp_registered(struct bnxt_en_dev *edev) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c index 2f1a1f2d2157..1467b94a6427 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c @@ -468,6 +468,7 @@ static void bnxt_vf_rep_netdev_init(struct bnxt *bp, struct bnxt_vf_rep *vf_rep, struct net_device *pf_dev = bp->dev; u16 max_mtu; + SET_NETDEV_DEV(dev, &bp->pdev->dev); dev->netdev_ops = &bnxt_vf_rep_netdev_ops; dev->ethtool_ops = &bnxt_vf_rep_ethtool_ops; /* Just inherit all the featues of the parent PF as the VF-R diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 4efa5fe6972b..fb43232310b2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -125,16 +125,20 @@ static void __bnxt_xmit_xdp_redirect(struct bnxt *bp, dma_unmap_len_set(tx_buf, len, 0); } -void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts) +void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) { struct bnxt_tx_ring_info *txr = bnapi->tx_ring; struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; bool rx_doorbell_needed = false; + int nr_pkts = bnapi->tx_pkts; struct bnxt_sw_tx_bd *tx_buf; u16 tx_cons = txr->tx_cons; u16 last_tx_cons = tx_cons; int i, j, frags; + if (!budget) + return; + for (i = 0; i < nr_pkts; i++) { tx_buf = &txr->tx_buf_ring[tx_cons]; @@ -161,6 +165,8 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts) } tx_cons = NEXT_TX(tx_cons); } + + bnapi->tx_pkts = 0; WRITE_ONCE(txr->tx_cons, tx_cons); if (rx_doorbell_needed) { tx_buf = &txr->tx_buf_ring[last_tx_cons]; @@ -180,8 +186,8 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, u8 *data_ptr, unsigned int len, struct xdp_buff *xdp) { + u32 buflen = BNXT_RX_PAGE_SIZE; struct bnxt_sw_rx_bd *rx_buf; - u32 buflen = PAGE_SIZE; struct pci_dev *pdev; dma_addr_t mapping; u32 offset; @@ -297,7 +303,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, rx_buf = &rxr->rx_buf_ring[cons]; mapping = rx_buf->mapping - bp->rx_dma_offset; dma_unmap_page_attrs(&pdev->dev, mapping, - PAGE_SIZE, bp->rx_dir, + BNXT_RX_PAGE_SIZE, bp->rx_dir, DMA_ATTR_WEAK_ORDERING); /* if we are unable to allocate a new buffer, abort and reuse */ @@ -480,7 +486,7 @@ bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags, } xdp_update_skb_shared_info(skb, num_frags, sinfo->xdp_frags_size, - PAGE_SIZE * sinfo->nr_frags, + BNXT_RX_PAGE_SIZE * sinfo->nr_frags, xdp_buff_is_frag_pfmemalloc(xdp)); return skb; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h index ea430d6961df..5e412c5655ba 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h @@ -16,7 +16,7 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, struct bnxt_tx_ring_info *txr, dma_addr_t mapping, u32 len, struct xdp_buff *xdp); -void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts); +void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int budget); bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, struct xdp_buff xdp, struct page *page, u8 **data_ptr, unsigned int *len, u8 *event); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index 3a4b6cb7b7b9..7a41cad5788f 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -42,6 +42,12 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) struct bcmgenet_priv *priv = netdev_priv(dev); struct device *kdev = &priv->pdev->dev; + if (dev->phydev) { + phy_ethtool_get_wol(dev->phydev, wol); + if (wol->supported) + return; + } + if (!device_can_wakeup(kdev)) { wol->supported = 0; wol->wolopts = 0; @@ -63,6 +69,14 @@ int bcmgenet_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct bcmgenet_priv *priv = netdev_priv(dev); struct device *kdev = &priv->pdev->dev; + int ret; + + /* Try Wake-on-LAN from the PHY first */ + if (dev->phydev) { + ret = phy_ethtool_set_wol(dev->phydev, wol); + if (ret != -EOPNOTSUPP) + return ret; + } if (!device_can_wakeup(kdev)) return -ENOTSUPP; diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index c15ed0acdb77..cc3afb605b1e 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -617,7 +617,7 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv) }; phydev = fixed_phy_register(PHY_POLL, &fphy_status, NULL); - if (!phydev || IS_ERR(phydev)) { + if (IS_ERR(phydev)) { dev_err(kdev, "failed to register fixed PHY device\n"); return -ENODEV; } @@ -673,5 +673,7 @@ void bcmgenet_mii_exit(struct net_device *dev) if (of_phy_is_fixed_link(dn)) of_phy_deregister_fixed_link(dn); of_node_put(priv->phy_dn); + clk_prepare_enable(priv->clk); platform_device_unregister(priv->mii_pdev); + clk_disable_unprepare(priv->clk); } diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 58747292521d..cb2810f175cc 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -57,6 +57,7 @@ #include <linux/crc32poly.h> #include <net/checksum.h> +#include <net/gso.h> #include <net/ip.h> #include <linux/io.h> @@ -224,6 +225,7 @@ MODULE_AUTHOR("David S. Miller (davem@redhat.com) and Jeff Garzik (jgarzik@pobox MODULE_DESCRIPTION("Broadcom Tigon3 ethernet driver"); MODULE_LICENSE("GPL"); MODULE_FIRMWARE(FIRMWARE_TG3); +MODULE_FIRMWARE(FIRMWARE_TG357766); MODULE_FIRMWARE(FIRMWARE_TG3TSO); MODULE_FIRMWARE(FIRMWARE_TG3TSO5); @@ -6879,7 +6881,10 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget) ri->data = NULL; - skb = build_skb(data, frag_size); + if (frag_size) + skb = build_skb(data, frag_size); + else + skb = slab_build_skb(data); if (!skb) { tg3_frag_free(frag_size != 0, data); goto drop_it_no_recycle; diff --git a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c index 04ad0f2b9677..7246e13dd559 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c +++ b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c @@ -512,11 +512,6 @@ bnad_debugfs_init(struct bnad *bnad) if (!bnad->port_debugfs_root) { bnad->port_debugfs_root = debugfs_create_dir(name, bna_debugfs_root); - if (!bnad->port_debugfs_root) { - netdev_warn(bnad->netdev, - "debugfs root dir creation failed\n"); - return; - } atomic_inc(&bna_debugfs_port_count); diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index cfbdd0022764..78c972bb1d96 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -82,6 +82,7 @@ #define GEM_NCFGR 0x0004 /* Network Config */ #define GEM_USRIO 0x000c /* User IO */ #define GEM_DMACFG 0x0010 /* DMA Configuration */ +#define GEM_PBUFRXCUT 0x0044 /* RX Partial Store and Forward */ #define GEM_JML 0x0048 /* Jumbo Max Length */ #define GEM_HS_MAC_CONFIG 0x0050 /* GEM high speed config */ #define GEM_HRB 0x0080 /* Hash Bottom */ @@ -347,6 +348,10 @@ #define GEM_ADDR64_SIZE 1 +/* Bitfields in PBUFRXCUT */ +#define GEM_ENCUTTHRU_OFFSET 31 /* Enable RX partial store and forward */ +#define GEM_ENCUTTHRU_SIZE 1 + /* Bitfields in NSR */ #define MACB_NSR_LINK_OFFSET 0 /* pcs_link_state */ #define MACB_NSR_LINK_SIZE 1 @@ -513,6 +518,8 @@ #define GEM_TX_PKT_BUFF_OFFSET 21 #define GEM_TX_PKT_BUFF_SIZE 1 +#define GEM_RX_PBUF_ADDR_OFFSET 22 +#define GEM_RX_PBUF_ADDR_SIZE 4 /* Bitfields in DCFG5. */ #define GEM_TSU_OFFSET 8 @@ -521,6 +528,8 @@ /* Bitfields in DCFG6. */ #define GEM_PBUF_LSO_OFFSET 27 #define GEM_PBUF_LSO_SIZE 1 +#define GEM_PBUF_CUTTHRU_OFFSET 25 +#define GEM_PBUF_CUTTHRU_SIZE 1 #define GEM_DAW64_OFFSET 23 #define GEM_DAW64_SIZE 1 @@ -1181,6 +1190,7 @@ struct macb_config { struct clk **hclk, struct clk **tx_clk, struct clk **rx_clk, struct clk **tsu_clk); int (*init)(struct platform_device *pdev); + unsigned int max_tx_length; int jumbo_max_len; const struct macb_usrio_config *usrio; }; @@ -1289,6 +1299,9 @@ struct macb { u32 wol; + /* holds value of rx watermark value for pbuf_rxcutthru register */ + u32 rx_watermark; + struct macb_ptp_info *ptp_info; /* macb-ptp interface */ struct phy *sgmii_phy; /* for ZynqMP SGMII mode */ diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 29a1199dad14..2d18acf3d5ea 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -563,7 +563,7 @@ static void macb_set_tx_clk(struct macb *bp, int speed) netdev_err(bp->dev, "adjusting tx_clk failed.\n"); } -static void macb_usx_pcs_link_up(struct phylink_pcs *pcs, unsigned int mode, +static void macb_usx_pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, int speed, int duplex) { @@ -596,7 +596,7 @@ static void macb_usx_pcs_get_state(struct phylink_pcs *pcs, } static int macb_usx_pcs_config(struct phylink_pcs *pcs, - unsigned int mode, + unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, bool permit_pause_to_mac) @@ -621,7 +621,7 @@ static void macb_pcs_an_restart(struct phylink_pcs *pcs) } static int macb_pcs_config(struct phylink_pcs *pcs, - unsigned int mode, + unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, bool permit_pause_to_mac) @@ -757,8 +757,6 @@ static void macb_mac_link_up(struct phylink_config *config, if (rx_pause) ctrl |= MACB_BIT(PAE); - macb_set_tx_clk(bp, speed); - /* Initialize rings & buffers as clearing MACB_BIT(TE) in link down * cleared the pipeline and control registers. */ @@ -778,6 +776,9 @@ static void macb_mac_link_up(struct phylink_config *config, spin_unlock_irqrestore(&bp->lock, flags); + if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) + macb_set_tx_clk(bp, speed); + /* Enable Rx and Tx; Enable PTP unicast */ ctrl = macb_readl(bp, NCR); if (gem_has_ptp(bp)) @@ -862,7 +863,9 @@ static int macb_mii_probe(struct net_device *dev) struct macb *bp = netdev_priv(dev); bp->phylink_sgmii_pcs.ops = &macb_phylink_pcs_ops; + bp->phylink_sgmii_pcs.neg_mode = true; bp->phylink_usx_pcs.ops = &macb_phylink_usx_pcs_ops; + bp->phylink_usx_pcs.neg_mode = true; bp->phylink_config.dev = &dev->dev; bp->phylink_config.type = PHYLINK_NETDEV; @@ -2635,6 +2638,9 @@ static void macb_reset_hw(struct macb *bp) macb_writel(bp, TSR, -1); macb_writel(bp, RSR, -1); + /* Disable RX partial store and forward and reset watermark value */ + gem_writel(bp, PBUFRXCUT, 0); + /* Disable all interrupts */ for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { queue_writel(queue, IDR, -1); @@ -2792,6 +2798,10 @@ static void macb_init_hw(struct macb *bp) bp->rx_frm_len_mask = MACB_RX_JFRMLEN_MASK; macb_configure_dma(bp); + + /* Enable RX partial store and forward and set watermark */ + if (bp->rx_watermark) + gem_writel(bp, PBUFRXCUT, (bp->rx_watermark | GEM_BIT(ENCUTTHRU))); } /* The hash address register is 64 bits long and takes up two @@ -4117,14 +4127,12 @@ static int macb_init(struct platform_device *pdev) /* setup appropriated routines according to adapter type */ if (macb_is_gem(bp)) { - bp->max_tx_length = GEM_MAX_TX_LEN; bp->macbgem_ops.mog_alloc_rx_buffers = gem_alloc_rx_buffers; bp->macbgem_ops.mog_free_rx_buffers = gem_free_rx_buffers; bp->macbgem_ops.mog_init_rings = gem_init_rings; bp->macbgem_ops.mog_rx = gem_rx; dev->ethtool_ops = &gem_ethtool_ops; } else { - bp->max_tx_length = MACB_MAX_TX_LEN; bp->macbgem_ops.mog_alloc_rx_buffers = macb_alloc_rx_buffers; bp->macbgem_ops.mog_free_rx_buffers = macb_free_rx_buffers; bp->macbgem_ops.mog_init_rings = macb_init_rings; @@ -4861,7 +4869,8 @@ static const struct macb_config mpfs_config = { .clk_init = macb_clk_init, .init = init_reset_optional, .usrio = &macb_default_usrio, - .jumbo_max_len = 10240, + .max_tx_length = 4040, /* Cadence Erratum 1686 */ + .jumbo_max_len = 4040, }; static const struct macb_config sama7g5_gem_config = { @@ -4947,6 +4956,7 @@ static int macb_probe(struct platform_device *pdev) phy_interface_t interface; struct net_device *dev; struct resource *regs; + u32 wtrmrk_rst_val; void __iomem *mem; struct macb *bp; int err, val; @@ -5012,6 +5022,13 @@ static int macb_probe(struct platform_device *pdev) if (macb_config) bp->jumbo_max_len = macb_config->jumbo_max_len; + if (!hw_is_gem(bp->regs, bp->native_io)) + bp->max_tx_length = MACB_MAX_TX_LEN; + else if (macb_config->max_tx_length) + bp->max_tx_length = macb_config->max_tx_length; + else + bp->max_tx_length = GEM_MAX_TX_LEN; + bp->wol = 0; if (of_property_read_bool(np, "magic-packet")) bp->wol |= MACB_WOL_HAS_MAGIC_PACKET; @@ -5019,6 +5036,25 @@ static int macb_probe(struct platform_device *pdev) bp->usrio = macb_config->usrio; + /* By default we set to partial store and forward mode for zynqmp. + * Disable if not set in devicetree. + */ + if (GEM_BFEXT(PBUF_CUTTHRU, gem_readl(bp, DCFG6))) { + err = of_property_read_u32(bp->pdev->dev.of_node, + "cdns,rx-watermark", + &bp->rx_watermark); + + if (!err) { + /* Disable partial store and forward in case of error or + * invalid watermark value + */ + wtrmrk_rst_val = (1 << (GEM_BFEXT(RX_PBUF_ADDR, gem_readl(bp, DCFG2)))) - 1; + if (bp->rx_watermark > wtrmrk_rst_val || !bp->rx_watermark) { + dev_info(&bp->pdev->dev, "Invalid watermark value\n"); + bp->rx_watermark = 0; + } + } + } spin_lock_init(&bp->lock); /* setup capabilities */ @@ -5159,6 +5195,9 @@ static int __maybe_unused macb_suspend(struct device *dev) unsigned int q; int err; + if (!device_may_wakeup(&bp->dev->dev)) + phy_exit(bp->sgmii_phy); + if (!netif_running(netdev)) return 0; @@ -5219,7 +5258,6 @@ static int __maybe_unused macb_suspend(struct device *dev) if (!(bp->wol & MACB_WOL_ENABLED)) { rtnl_lock(); phylink_stop(bp->phylink); - phy_exit(bp->sgmii_phy); rtnl_unlock(); spin_lock_irqsave(&bp->lock, flags); macb_reset_hw(bp); @@ -5249,6 +5287,9 @@ static int __maybe_unused macb_resume(struct device *dev) unsigned int q; int err; + if (!device_may_wakeup(&bp->dev->dev)) + phy_init(bp->sgmii_phy); + if (!netif_running(netdev)) return 0; @@ -5309,8 +5350,6 @@ static int __maybe_unused macb_resume(struct device *dev) macb_set_rx_mode(netdev); macb_restore_features(bp); rtnl_lock(); - if (!device_may_wakeup(&bp->dev->dev)) - phy_init(bp->sgmii_phy); phylink_start(bp->phylink); rtnl_unlock(); diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig index 1c76c95b0b27..ca742cc146d7 100644 --- a/drivers/net/ethernet/cavium/Kconfig +++ b/drivers/net/ethernet/cavium/Kconfig @@ -62,6 +62,9 @@ config CAVIUM_PTP Precision Time Protocol or other purposes. Timestamps can be used in BGX, TNS, GTI, and NIC blocks. +config LIQUIDIO_CORE + tristate + config LIQUIDIO tristate "Cavium LiquidIO support" depends on 64BIT && PCI @@ -69,6 +72,7 @@ config LIQUIDIO depends on PTP_1588_CLOCK_OPTIONAL select FW_LOADER select LIBCRC32C + select LIQUIDIO_CORE select NET_DEVLINK help This driver supports Cavium LiquidIO Intelligent Server Adapters @@ -92,6 +96,7 @@ config LIQUIDIO_VF tristate "Cavium LiquidIO VF support" depends on 64BIT && PCI_MSI depends on PTP_1588_CLOCK_OPTIONAL + select LIQUIDIO_CORE help This driver supports Cavium LiquidIO Intelligent Server Adapter based on CN23XX chips. diff --git a/drivers/net/ethernet/cavium/liquidio/Makefile b/drivers/net/ethernet/cavium/liquidio/Makefile index bc9937502043..4ee80af88e79 100644 --- a/drivers/net/ethernet/cavium/liquidio/Makefile +++ b/drivers/net/ethernet/cavium/liquidio/Makefile @@ -3,7 +3,9 @@ # Cavium Liquidio ethernet device driver # -common-objs := lio_ethtool.o \ +obj-$(CONFIG_LIQUIDIO_CORE) += liquidio-core.o +liquidio-core-y := \ + lio_ethtool.o \ lio_core.o \ request_manager.o \ response_manager.o \ @@ -18,7 +20,7 @@ common-objs := lio_ethtool.o \ octeon_nic.o obj-$(CONFIG_LIQUIDIO) += liquidio.o -liquidio-y := lio_main.o octeon_console.o lio_vf_rep.o $(common-objs) +liquidio-y := lio_main.o octeon_console.o lio_vf_rep.o obj-$(CONFIG_LIQUIDIO_VF) += liquidio_vf.o -liquidio_vf-y := lio_vf_main.o $(common-objs) +liquidio_vf-y := lio_vf_main.o diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c index 9ed3d1ab2ca5..068ed52b66c9 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c @@ -719,12 +719,10 @@ static int cn23xx_setup_pf_mbox(struct octeon_device *oct) for (i = 0; i < oct->sriov_info.max_vfs; i++) { q_no = i * oct->sriov_info.rings_per_vf; - mbox = vmalloc(sizeof(*mbox)); + mbox = vzalloc(sizeof(*mbox)); if (!mbox) goto free_mbox; - memset(mbox, 0, sizeof(struct octeon_mbox)); - spin_lock_init(&mbox->lock); mbox->oct_dev = oct; @@ -1377,6 +1375,7 @@ int setup_cn23xx_octeon_pf_device(struct octeon_device *oct) return 0; } +EXPORT_SYMBOL_GPL(setup_cn23xx_octeon_pf_device); int validate_cn23xx_pf_config_info(struct octeon_device *oct, struct octeon_config *conf23xx) @@ -1435,6 +1434,7 @@ int cn23xx_fw_loaded(struct octeon_device *oct) val = octeon_read_csr64(oct, CN23XX_SLI_SCRATCH2); return (val >> SCR2_BIT_FW_LOADED) & 1ULL; } +EXPORT_SYMBOL_GPL(cn23xx_fw_loaded); void cn23xx_tell_vf_its_macaddr_changed(struct octeon_device *oct, int vfidx, u8 *mac) @@ -1456,6 +1456,7 @@ void cn23xx_tell_vf_its_macaddr_changed(struct octeon_device *oct, int vfidx, octeon_mbox_write(oct, &mbox_cmd); } } +EXPORT_SYMBOL_GPL(cn23xx_tell_vf_its_macaddr_changed); static void cn23xx_get_vf_stats_callback(struct octeon_device *oct, @@ -1510,3 +1511,4 @@ int cn23xx_get_vf_stats(struct octeon_device *oct, int vfidx, return 0; } +EXPORT_SYMBOL_GPL(cn23xx_get_vf_stats); diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c index fda49404968c..dd5d80fee24f 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c @@ -279,12 +279,10 @@ static int cn23xx_setup_vf_mbox(struct octeon_device *oct) { struct octeon_mbox *mbox = NULL; - mbox = vmalloc(sizeof(*mbox)); + mbox = vzalloc(sizeof(*mbox)); if (!mbox) return 1; - memset(mbox, 0, sizeof(struct octeon_mbox)); - spin_lock_init(&mbox->lock); mbox->oct_dev = oct; @@ -386,6 +384,7 @@ void cn23xx_vf_ask_pf_to_do_flr(struct octeon_device *oct) octeon_mbox_write(oct, &mbox_cmd); } +EXPORT_SYMBOL_GPL(cn23xx_vf_ask_pf_to_do_flr); static void octeon_pfvf_hs_callback(struct octeon_device *oct, struct octeon_mbox_cmd *cmd, @@ -468,6 +467,7 @@ int cn23xx_octeon_pfvf_handshake(struct octeon_device *oct) return 0; } +EXPORT_SYMBOL_GPL(cn23xx_octeon_pfvf_handshake); static void cn23xx_handle_vf_mbox_intr(struct octeon_ioq_vector *ioq_vector) { @@ -680,3 +680,4 @@ int cn23xx_setup_octeon_vf_device(struct octeon_device *oct) return 0; } +EXPORT_SYMBOL_GPL(cn23xx_setup_octeon_vf_device); diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c index 39643be8c30a..93fccfec288d 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c @@ -697,6 +697,7 @@ int lio_setup_cn66xx_octeon_device(struct octeon_device *oct) return 0; } +EXPORT_SYMBOL_GPL(lio_setup_cn66xx_octeon_device); int lio_validate_cn6xxx_config_info(struct octeon_device *oct, struct octeon_config *conf6xxx) diff --git a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c index 30254e4cf70f..b5103def3761 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c @@ -181,3 +181,4 @@ int lio_setup_cn68xx_octeon_device(struct octeon_device *oct) return 0; } +EXPORT_SYMBOL_GPL(lio_setup_cn68xx_octeon_device); diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c index 882b2be06ea0..9cc6303c82ff 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_core.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c @@ -26,6 +26,9 @@ #include "octeon_main.h" #include "octeon_network.h" +MODULE_AUTHOR("Cavium Networks, <support@cavium.com>"); +MODULE_LICENSE("GPL"); + /* OOM task polling interval */ #define LIO_OOM_POLL_INTERVAL_MS 250 @@ -71,6 +74,7 @@ void lio_delete_glists(struct lio *lio) kfree(lio->glist); lio->glist = NULL; } +EXPORT_SYMBOL_GPL(lio_delete_glists); /** * lio_setup_glists - Setup gather lists @@ -154,6 +158,7 @@ int lio_setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs) return 0; } +EXPORT_SYMBOL_GPL(lio_setup_glists); int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1) { @@ -180,6 +185,7 @@ int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1) } return ret; } +EXPORT_SYMBOL_GPL(liquidio_set_feature); void octeon_report_tx_completion_to_bql(void *txq, unsigned int pkts_compl, unsigned int bytes_compl) @@ -395,6 +401,7 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr) nctrl->ncmd.s.cmd); } } +EXPORT_SYMBOL_GPL(liquidio_link_ctrl_cmd_completion); void octeon_pf_changed_vf_macaddr(struct octeon_device *oct, u8 *mac) { @@ -478,6 +485,7 @@ int setup_rx_oom_poll_fn(struct net_device *netdev) return 0; } +EXPORT_SYMBOL_GPL(setup_rx_oom_poll_fn); void cleanup_rx_oom_poll_fn(struct net_device *netdev) { @@ -495,6 +503,7 @@ void cleanup_rx_oom_poll_fn(struct net_device *netdev) } } } +EXPORT_SYMBOL_GPL(cleanup_rx_oom_poll_fn); /* Runs in interrupt context. */ static void lio_update_txq_status(struct octeon_device *oct, int iq_num) @@ -899,6 +908,7 @@ int liquidio_setup_io_queues(struct octeon_device *octeon_dev, int ifidx, return 0; } +EXPORT_SYMBOL_GPL(liquidio_setup_io_queues); static int liquidio_schedule_msix_droq_pkt_handler(struct octeon_droq *droq, u64 ret) @@ -1194,6 +1204,7 @@ int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs) } return 0; } +EXPORT_SYMBOL_GPL(octeon_setup_interrupt); /** * liquidio_change_mtu - Net device change_mtu @@ -1256,6 +1267,7 @@ int liquidio_change_mtu(struct net_device *netdev, int new_mtu) WRITE_ONCE(sc->caller_is_done, true); return 0; } +EXPORT_SYMBOL_GPL(liquidio_change_mtu); int lio_wait_for_clean_oq(struct octeon_device *oct) { @@ -1279,6 +1291,7 @@ int lio_wait_for_clean_oq(struct octeon_device *oct) return pending_pkts; } +EXPORT_SYMBOL_GPL(lio_wait_for_clean_oq); static void octnet_nic_stats_callback(struct octeon_device *oct_dev, @@ -1509,6 +1522,7 @@ lio_fetch_stats_exit: return; } +EXPORT_SYMBOL_GPL(lio_fetch_stats); int liquidio_set_speed(struct lio *lio, int speed) { @@ -1659,6 +1673,7 @@ int liquidio_get_speed(struct lio *lio) return retval; } +EXPORT_SYMBOL_GPL(liquidio_get_speed); int liquidio_set_fec(struct lio *lio, int on_off) { @@ -1812,3 +1827,4 @@ int liquidio_get_fec(struct lio *lio) return retval; } +EXPORT_SYMBOL_GPL(liquidio_get_fec); diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c index 2c10ae3f7fc1..9d56181a301f 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c @@ -3180,3 +3180,4 @@ void liquidio_set_ethtool_ops(struct net_device *netdev) else netdev->ethtool_ops = &lio_ethtool_ops; } +EXPORT_SYMBOL_GPL(liquidio_set_ethtool_ops); diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 9bd1d2d7027d..100daadbea2a 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -191,8 +191,7 @@ static void octeon_droq_bh(struct tasklet_struct *t) static int lio_wait_for_oq_pkts(struct octeon_device *oct) { - struct octeon_device_priv *oct_priv = - (struct octeon_device_priv *)oct->priv; + struct octeon_device_priv *oct_priv = oct->priv; int retry = 100, pkt_cnt = 0, pending_pkts = 0; int i; @@ -950,8 +949,7 @@ static void octeon_destroy_resources(struct octeon_device *oct) { int i, refcount; struct msix_entry *msix_entries; - struct octeon_device_priv *oct_priv = - (struct octeon_device_priv *)oct->priv; + struct octeon_device_priv *oct_priv = oct->priv; struct handshake *hs; @@ -1211,8 +1209,7 @@ static int send_rx_ctrl_cmd(struct lio *lio, int start_stop) static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx) { struct net_device *netdev = oct->props[ifidx].netdev; - struct octeon_device_priv *oct_priv = - (struct octeon_device_priv *)oct->priv; + struct octeon_device_priv *oct_priv = oct->priv; struct napi_struct *napi, *n; struct lio *lio; @@ -1774,8 +1771,7 @@ static int liquidio_open(struct net_device *netdev) { struct lio *lio = GET_LIO(netdev); struct octeon_device *oct = lio->oct_dev; - struct octeon_device_priv *oct_priv = - (struct octeon_device_priv *)oct->priv; + struct octeon_device_priv *oct_priv = oct->priv; struct napi_struct *napi, *n; int ret = 0; @@ -1855,8 +1851,7 @@ static int liquidio_stop(struct net_device *netdev) { struct lio *lio = GET_LIO(netdev); struct octeon_device *oct = lio->oct_dev; - struct octeon_device_priv *oct_priv = - (struct octeon_device_priv *)oct->priv; + struct octeon_device_priv *oct_priv = oct->priv; struct napi_struct *napi, *n; int ret = 0; @@ -4057,8 +4052,7 @@ static int octeon_device_init(struct octeon_device *octeon_dev) char bootcmd[] = "\n"; char *dbg_enb = NULL; enum lio_fw_state fw_state; - struct octeon_device_priv *oct_priv = - (struct octeon_device_priv *)octeon_dev->priv; + struct octeon_device_priv *oct_priv = octeon_dev->priv; atomic_set(&octeon_dev->status, OCT_DEV_BEGIN_STATE); /* Enable access to the octeon device and make its DMA capability diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index e2921aec3da0..62c2eadc33e3 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -72,8 +72,7 @@ static int liquidio_stop(struct net_device *netdev); static int lio_wait_for_oq_pkts(struct octeon_device *oct) { - struct octeon_device_priv *oct_priv = - (struct octeon_device_priv *)oct->priv; + struct octeon_device_priv *oct_priv = oct->priv; int retry = MAX_IO_PENDING_PKT_COUNT; int pkt_cnt = 0, pending_pkts; int i; @@ -442,8 +441,7 @@ static void octeon_pci_flr(struct octeon_device *oct) */ static void octeon_destroy_resources(struct octeon_device *oct) { - struct octeon_device_priv *oct_priv = - (struct octeon_device_priv *)oct->priv; + struct octeon_device_priv *oct_priv = oct->priv; struct msix_entry *msix_entries; int i; @@ -659,8 +657,7 @@ static int send_rx_ctrl_cmd(struct lio *lio, int start_stop) static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx) { struct net_device *netdev = oct->props[ifidx].netdev; - struct octeon_device_priv *oct_priv = - (struct octeon_device_priv *)oct->priv; + struct octeon_device_priv *oct_priv = oct->priv; struct napi_struct *napi, *n; struct lio *lio; @@ -909,8 +906,7 @@ static int liquidio_open(struct net_device *netdev) { struct lio *lio = GET_LIO(netdev); struct octeon_device *oct = lio->oct_dev; - struct octeon_device_priv *oct_priv = - (struct octeon_device_priv *)oct->priv; + struct octeon_device_priv *oct_priv = oct->priv; struct napi_struct *napi, *n; int ret = 0; @@ -956,8 +952,7 @@ static int liquidio_stop(struct net_device *netdev) { struct lio *lio = GET_LIO(netdev); struct octeon_device *oct = lio->oct_dev; - struct octeon_device_priv *oct_priv = - (struct octeon_device_priv *)oct->priv; + struct octeon_device_priv *oct_priv = oct->priv; struct napi_struct *napi, *n; int ret = 0; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index e159194d0aef..364f4f912dc2 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -564,6 +564,7 @@ void octeon_init_device_list(int conf_type) for (i = 0; i < MAX_OCTEON_DEVICES; i++) oct_set_config_info(i, conf_type); } +EXPORT_SYMBOL_GPL(octeon_init_device_list); static void *__retrieve_octeon_config_info(struct octeon_device *oct, u16 card_type) @@ -633,6 +634,7 @@ char *lio_get_state_string(atomic_t *state_ptr) return oct_dev_state_str[OCT_DEV_STATE_INVALID]; return oct_dev_state_str[istate]; } +EXPORT_SYMBOL_GPL(lio_get_state_string); static char *get_oct_app_string(u32 app_mode) { @@ -661,6 +663,7 @@ void octeon_free_device_mem(struct octeon_device *oct) octeon_device[i] = NULL; octeon_device_count--; } +EXPORT_SYMBOL_GPL(octeon_free_device_mem); static struct octeon_device *octeon_allocate_device_mem(u32 pci_id, u32 priv_size) @@ -747,6 +750,7 @@ struct octeon_device *octeon_allocate_device(u32 pci_id, return oct; } +EXPORT_SYMBOL_GPL(octeon_allocate_device); /** Register a device's bus location at initialization time. * @param octeon_dev - pointer to the octeon device structure. @@ -804,6 +808,7 @@ int octeon_register_device(struct octeon_device *oct, return refcount; } +EXPORT_SYMBOL_GPL(octeon_register_device); /** Deregister a device at de-initialization time. * @param octeon_dev - pointer to the octeon device structure. @@ -821,6 +826,7 @@ int octeon_deregister_device(struct octeon_device *oct) return refcount; } +EXPORT_SYMBOL_GPL(octeon_deregister_device); int octeon_allocate_ioq_vector(struct octeon_device *oct, u32 num_ioqs) @@ -853,12 +859,14 @@ octeon_allocate_ioq_vector(struct octeon_device *oct, u32 num_ioqs) return 0; } +EXPORT_SYMBOL_GPL(octeon_allocate_ioq_vector); void octeon_free_ioq_vector(struct octeon_device *oct) { vfree(oct->ioq_vector); } +EXPORT_SYMBOL_GPL(octeon_free_ioq_vector); /* this function is only for setting up the first queue */ int octeon_setup_instr_queues(struct octeon_device *oct) @@ -904,6 +912,7 @@ int octeon_setup_instr_queues(struct octeon_device *oct) oct->num_iqs++; return 0; } +EXPORT_SYMBOL_GPL(octeon_setup_instr_queues); int octeon_setup_output_queues(struct octeon_device *oct) { @@ -940,6 +949,7 @@ int octeon_setup_output_queues(struct octeon_device *oct) return 0; } +EXPORT_SYMBOL_GPL(octeon_setup_output_queues); int octeon_set_io_queues_off(struct octeon_device *oct) { @@ -989,6 +999,7 @@ int octeon_set_io_queues_off(struct octeon_device *oct) } return 0; } +EXPORT_SYMBOL_GPL(octeon_set_io_queues_off); void octeon_set_droq_pkt_op(struct octeon_device *oct, u32 q_no, @@ -1027,6 +1038,7 @@ int octeon_init_dispatch_list(struct octeon_device *oct) return 0; } +EXPORT_SYMBOL_GPL(octeon_init_dispatch_list); void octeon_delete_dispatch_list(struct octeon_device *oct) { @@ -1058,6 +1070,7 @@ void octeon_delete_dispatch_list(struct octeon_device *oct) kfree(temp); } } +EXPORT_SYMBOL_GPL(octeon_delete_dispatch_list); octeon_dispatch_fn_t octeon_get_dispatch(struct octeon_device *octeon_dev, u16 opcode, @@ -1180,6 +1193,7 @@ octeon_register_dispatch_fn(struct octeon_device *oct, return 0; } +EXPORT_SYMBOL_GPL(octeon_register_dispatch_fn); int octeon_core_drv_init(struct octeon_recv_info *recv_info, void *buf) { @@ -1262,6 +1276,7 @@ core_drv_init_err: octeon_free_recv_info(recv_info); return 0; } +EXPORT_SYMBOL_GPL(octeon_core_drv_init); int octeon_get_tx_qsize(struct octeon_device *oct, u32 q_no) @@ -1272,6 +1287,7 @@ int octeon_get_tx_qsize(struct octeon_device *oct, u32 q_no) return -1; } +EXPORT_SYMBOL_GPL(octeon_get_tx_qsize); int octeon_get_rx_qsize(struct octeon_device *oct, u32 q_no) { @@ -1280,6 +1296,7 @@ int octeon_get_rx_qsize(struct octeon_device *oct, u32 q_no) return oct->droq[q_no]->max_count; return -1; } +EXPORT_SYMBOL_GPL(octeon_get_rx_qsize); /* Retruns the host firmware handshake OCTEON specific configuration */ struct octeon_config *octeon_get_conf(struct octeon_device *oct) @@ -1302,6 +1319,7 @@ struct octeon_config *octeon_get_conf(struct octeon_device *oct) } return default_oct_conf; } +EXPORT_SYMBOL_GPL(octeon_get_conf); /* scratch register address is same in all the OCT-II and CN70XX models */ #define CNXX_SLI_SCRATCH1 0x3C0 @@ -1318,6 +1336,7 @@ struct octeon_device *lio_get_device(u32 octeon_id) else return octeon_device[octeon_id]; } +EXPORT_SYMBOL_GPL(lio_get_device); u64 lio_pci_readq(struct octeon_device *oct, u64 addr) { @@ -1349,6 +1368,7 @@ u64 lio_pci_readq(struct octeon_device *oct, u64 addr) return val64; } +EXPORT_SYMBOL_GPL(lio_pci_readq); void lio_pci_writeq(struct octeon_device *oct, u64 val, @@ -1369,6 +1389,7 @@ void lio_pci_writeq(struct octeon_device *oct, spin_unlock_irqrestore(&oct->pci_win_lock, flags); } +EXPORT_SYMBOL_GPL(lio_pci_writeq); int octeon_mem_access_ok(struct octeon_device *oct) { @@ -1388,6 +1409,7 @@ int octeon_mem_access_ok(struct octeon_device *oct) return access_okay ? 0 : 1; } +EXPORT_SYMBOL_GPL(octeon_mem_access_ok); int octeon_wait_for_ddr_init(struct octeon_device *oct, u32 *timeout) { @@ -1408,6 +1430,7 @@ int octeon_wait_for_ddr_init(struct octeon_device *oct, u32 *timeout) return ret; } +EXPORT_SYMBOL_GPL(octeon_wait_for_ddr_init); /* Get the octeon id assigned to the octeon device passed as argument. * This function is exported to other modules. @@ -1462,3 +1485,4 @@ void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq) } } } +EXPORT_SYMBOL_GPL(lio_enable_irq); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c index d4080bddcb6b..0d6ee30affb9 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c @@ -107,6 +107,7 @@ u32 octeon_droq_check_hw_for_pkts(struct octeon_droq *droq) return last_count; } +EXPORT_SYMBOL_GPL(octeon_droq_check_hw_for_pkts); static void octeon_droq_compute_max_packet_bufs(struct octeon_droq *droq) { @@ -216,6 +217,7 @@ int octeon_delete_droq(struct octeon_device *oct, u32 q_no) return 0; } +EXPORT_SYMBOL_GPL(octeon_delete_droq); int octeon_init_droq(struct octeon_device *oct, u32 q_no, @@ -773,6 +775,7 @@ octeon_droq_process_packets(struct octeon_device *oct, return 0; } +EXPORT_SYMBOL_GPL(octeon_droq_process_packets); /* * Utility function to poll for packets. check_hw_for_packets must be @@ -921,6 +924,7 @@ int octeon_unregister_droq_ops(struct octeon_device *oct, u32 q_no) return 0; } +EXPORT_SYMBOL_GPL(octeon_unregister_droq_ops); int octeon_create_droq(struct octeon_device *oct, u32 q_no, u32 num_descs, diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c index 7ccab36143c1..d70132437af3 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c @@ -164,6 +164,7 @@ octeon_pci_read_core_mem(struct octeon_device *oct, { __octeon_pci_rw_core_mem(oct, coreaddr, buf, len, 1); } +EXPORT_SYMBOL_GPL(octeon_pci_read_core_mem); void octeon_pci_write_core_mem(struct octeon_device *oct, @@ -173,6 +174,7 @@ octeon_pci_write_core_mem(struct octeon_device *oct, { __octeon_pci_rw_core_mem(oct, coreaddr, (u8 *)buf, len, 0); } +EXPORT_SYMBOL_GPL(octeon_pci_write_core_mem); u64 octeon_read_device_mem64(struct octeon_device *oct, u64 coreaddr) { @@ -182,6 +184,7 @@ u64 octeon_read_device_mem64(struct octeon_device *oct, u64 coreaddr) return be64_to_cpu(ret); } +EXPORT_SYMBOL_GPL(octeon_read_device_mem64); u32 octeon_read_device_mem32(struct octeon_device *oct, u64 coreaddr) { @@ -191,6 +194,7 @@ u32 octeon_read_device_mem32(struct octeon_device *oct, u64 coreaddr) return be32_to_cpu(ret); } +EXPORT_SYMBOL_GPL(octeon_read_device_mem32); void octeon_write_device_mem32(struct octeon_device *oct, u64 coreaddr, u32 val) @@ -199,3 +203,4 @@ void octeon_write_device_mem32(struct octeon_device *oct, u64 coreaddr, __octeon_pci_rw_core_mem(oct, coreaddr, (u8 *)&t, 4, 0); } +EXPORT_SYMBOL_GPL(octeon_write_device_mem32); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c index 1a706f81bbb0..dee56ea740e7 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c @@ -79,6 +79,7 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct, return sc; } +EXPORT_SYMBOL_GPL(octeon_alloc_soft_command_resp); int octnet_send_nic_data_pkt(struct octeon_device *oct, struct octnic_data_pkt *ndata, @@ -90,6 +91,7 @@ int octnet_send_nic_data_pkt(struct octeon_device *oct, ndata->buf, ndata->datasize, ndata->reqtype); } +EXPORT_SYMBOL_GPL(octnet_send_nic_data_pkt); static inline struct octeon_soft_command *octnic_alloc_ctrl_pkt_sc(struct octeon_device *oct, @@ -196,3 +198,4 @@ octnet_send_nic_ctrl_pkt(struct octeon_device *oct, return retval; } +EXPORT_SYMBOL_GPL(octnet_send_nic_ctrl_pkt); diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c index 32f854c0cd79..de8a6ce86ad7 100644 --- a/drivers/net/ethernet/cavium/liquidio/request_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c @@ -185,6 +185,7 @@ int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no) } return 1; } +EXPORT_SYMBOL_GPL(octeon_delete_instr_queue); /* Return 0 on success, 1 on failure */ int octeon_setup_iq(struct octeon_device *oct, @@ -258,6 +259,7 @@ int lio_wait_for_instr_fetch(struct octeon_device *oct) return instr_cnt; } +EXPORT_SYMBOL_GPL(lio_wait_for_instr_fetch); static inline void ring_doorbell(struct octeon_device *oct, struct octeon_instr_queue *iq) @@ -282,6 +284,7 @@ octeon_ring_doorbell_locked(struct octeon_device *oct, u32 iq_no) ring_doorbell(oct, iq); spin_unlock(&iq->post_lock); } +EXPORT_SYMBOL_GPL(octeon_ring_doorbell_locked); static inline void __copy_cmd_into_iq(struct octeon_instr_queue *iq, u8 *cmd) @@ -345,6 +348,7 @@ octeon_register_reqtype_free_fn(struct octeon_device *oct, int reqtype, return 0; } +EXPORT_SYMBOL_GPL(octeon_register_reqtype_free_fn); static inline void __add_to_request_list(struct octeon_instr_queue *iq, @@ -430,6 +434,7 @@ lio_process_iq_request_list(struct octeon_device *oct, return inst_count; } +EXPORT_SYMBOL_GPL(lio_process_iq_request_list); /* Can only be called from process context */ int @@ -566,6 +571,7 @@ octeon_send_command(struct octeon_device *oct, u32 iq_no, return st.status; } +EXPORT_SYMBOL_GPL(octeon_send_command); void octeon_prepare_soft_command(struct octeon_device *oct, @@ -673,6 +679,7 @@ octeon_prepare_soft_command(struct octeon_device *oct, } } } +EXPORT_SYMBOL_GPL(octeon_prepare_soft_command); int octeon_send_soft_command(struct octeon_device *oct, struct octeon_soft_command *sc) @@ -726,6 +733,7 @@ int octeon_send_soft_command(struct octeon_device *oct, return (octeon_send_command(oct, sc->iq_no, 1, &sc->cmd, sc, len, REQTYPE_SOFT_COMMAND)); } +EXPORT_SYMBOL_GPL(octeon_send_soft_command); int octeon_setup_sc_buffer_pool(struct octeon_device *oct) { @@ -755,6 +763,7 @@ int octeon_setup_sc_buffer_pool(struct octeon_device *oct) return 0; } +EXPORT_SYMBOL_GPL(octeon_setup_sc_buffer_pool); int octeon_free_sc_done_list(struct octeon_device *oct) { @@ -794,6 +803,7 @@ int octeon_free_sc_done_list(struct octeon_device *oct) return 0; } +EXPORT_SYMBOL_GPL(octeon_free_sc_done_list); int octeon_free_sc_zombie_list(struct octeon_device *oct) { @@ -818,6 +828,7 @@ int octeon_free_sc_zombie_list(struct octeon_device *oct) return 0; } +EXPORT_SYMBOL_GPL(octeon_free_sc_zombie_list); int octeon_free_sc_buffer_pool(struct octeon_device *oct) { @@ -842,6 +853,7 @@ int octeon_free_sc_buffer_pool(struct octeon_device *oct) return 0; } +EXPORT_SYMBOL_GPL(octeon_free_sc_buffer_pool); struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct, u32 datasize, @@ -913,6 +925,7 @@ struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct, return sc; } +EXPORT_SYMBOL_GPL(octeon_alloc_soft_command); void octeon_free_soft_command(struct octeon_device *oct, struct octeon_soft_command *sc) @@ -925,3 +938,4 @@ void octeon_free_soft_command(struct octeon_device *oct, spin_unlock_bh(&oct->sc_buf_pool.lock); } +EXPORT_SYMBOL_GPL(octeon_free_soft_command); diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.c b/drivers/net/ethernet/cavium/liquidio/response_manager.c index ac7747ccf56a..861050966e18 100644 --- a/drivers/net/ethernet/cavium/liquidio/response_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/response_manager.c @@ -52,12 +52,14 @@ int octeon_setup_response_list(struct octeon_device *oct) return ret; } +EXPORT_SYMBOL_GPL(octeon_setup_response_list); void octeon_delete_response_list(struct octeon_device *oct) { cancel_delayed_work_sync(&oct->dma_comp_wq.wk.work); destroy_workqueue(oct->dma_comp_wq.wq); } +EXPORT_SYMBOL_GPL(octeon_delete_response_list); int lio_process_ordered_list(struct octeon_device *octeon_dev, u32 force_quit) @@ -219,6 +221,7 @@ int lio_process_ordered_list(struct octeon_device *octeon_dev, return 0; } +EXPORT_SYMBOL_GPL(lio_process_ordered_list); static void oct_poll_req_completion(struct work_struct *work) { diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 7eb2ddbe9bad..a317feb8decb 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1126,8 +1126,7 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) } poll: - lmac->check_link = alloc_workqueue("check_link", WQ_UNBOUND | - WQ_MEM_RECLAIM, 1); + lmac->check_link = alloc_ordered_workqueue("check_link", WQ_MEM_RECLAIM); if (!lmac->check_link) return -ENOMEM; INIT_DELAYED_WORK(&lmac->dwork, bgx_poll_for_link); diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c index efa7f401529e..2e9a74fe0970 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c @@ -2184,9 +2184,8 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, len -= offset; rx_frag += nr_frags; - __skb_frag_set_page(rx_frag, sd->pg_chunk.page); - skb_frag_off_set(rx_frag, sd->pg_chunk.offset + offset); - skb_frag_size_set(rx_frag, len); + skb_frag_fill_page_desc(rx_frag, sd->pg_chunk.page, + sd->pg_chunk.offset + offset, len); skb->len += len; skb->data_len += len; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index f0bc7396ce2b..2eb33a727bba 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -1175,7 +1175,7 @@ static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb, txq = netdev_pick_tx(dev, skb, sb_dev); if (xfrm_offload(skb) || is_ptp_enabled(skb, dev) || skb->encapsulation || - cxgb4_is_ktls_skb(skb) || + tls_is_skb_tx_device_offloaded(skb) || (proto != IPPROTO_TCP && proto != IPPROTO_UDP)) txq = txq % pi->nqsets; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 34546f5312ee..a9599ba26975 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -497,11 +497,6 @@ struct cxgb4_uld_info { #endif }; -static inline bool cxgb4_is_ktls_skb(struct sk_buff *skb) -{ - return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk); -} - void cxgb4_uld_enable(struct adapter *adap); void cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p); int cxgb4_unregister_uld(enum cxgb4_uld type); diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 46809e2d94ee..98dd78551d89 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -1530,7 +1530,7 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) #endif /* CHELSIO_IPSEC_INLINE */ #if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE) - if (cxgb4_is_ktls_skb(skb) && + if (tls_is_skb_tx_device_offloaded(skb) && (skb->len - skb_tcp_all_headers(skb))) return adap->uld[CXGB4_ULD_KTLS].tx_handler(skb, dev); #endif /* CHELSIO_TLS_DEVICE */ diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 1a5fdd755e9e..bcdc7fc2f427 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -1946,7 +1946,7 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) tls_ctx = tls_get_ctx(skb->sk); tls_netdev = rcu_dereference_bh(tls_ctx->netdev); /* Don't quit on NULL: if tls_device_down is running in parallel, - * netdev might become NULL, even if tls_is_sk_tx_device_offloaded was + * netdev might become NULL, even if tls_is_skb_tx_device_offloaded was * true. Rather continue processing this packet. */ if (unlikely(tls_netdev && tls_netdev != dev)) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h index 41714203ace8..68562a82d036 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h @@ -568,8 +568,7 @@ void chtls_destroy_sock(struct sock *sk); int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len); -int chtls_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags); +void chtls_splice_eof(struct socket *sock); int send_tx_flowc_wr(struct sock *sk, int compl, u32 snd_nxt, u32 rcv_nxt); void chtls_tcp_push(struct sock *sk, int flags); diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index c2e7037c7ba1..7750702900fa 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1466,7 +1466,7 @@ static void make_established(struct sock *sk, u32 snd_isn, unsigned int opt) tp->write_seq = snd_isn; tp->snd_nxt = snd_isn; tp->snd_una = snd_isn; - inet_sk(sk)->inet_id = get_random_u16(); + atomic_set(&inet_sk(sk)->inet_id, get_random_u16()); assign_rxopt(sk, opt); if (tp->rcv_wnd > (RCV_BUFSIZ_M << 10)) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c index ae6b17b96bf1..5fc64e47568a 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c @@ -1092,7 +1092,17 @@ new_buf: if (copy > size) copy = size; - if (skb_tailroom(skb) > 0) { + if (msg->msg_flags & MSG_SPLICE_PAGES) { + err = skb_splice_from_iter(skb, &msg->msg_iter, copy, + sk->sk_allocation); + if (err < 0) { + if (err == -EMSGSIZE) + goto new_buf; + goto do_fault; + } + copy = err; + sk_wmem_queued_add(sk, copy); + } else if (skb_tailroom(skb) > 0) { copy = min(copy, skb_tailroom(skb)); if (is_tls_tx(csk)) copy = min_t(int, copy, csk->tlshws.txleft); @@ -1227,113 +1237,13 @@ out_err: goto done; } -int chtls_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags) +void chtls_splice_eof(struct socket *sock) { - struct chtls_sock *csk; - struct chtls_dev *cdev; - int mss, err, copied; - struct tcp_sock *tp; - long timeo; + struct sock *sk = sock->sk; - tp = tcp_sk(sk); - copied = 0; - csk = rcu_dereference_sk_user_data(sk); - cdev = csk->cdev; lock_sock(sk); - timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); - - err = sk_stream_wait_connect(sk, &timeo); - if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && - err != 0) - goto out_err; - - mss = csk->mss; - csk_set_flag(csk, CSK_TX_MORE_DATA); - - while (size > 0) { - struct sk_buff *skb = skb_peek_tail(&csk->txq); - int copy, i; - - if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) || - (copy = mss - skb->len) <= 0) { -new_buf: - if (!csk_mem_free(cdev, sk)) - goto wait_for_sndbuf; - - if (is_tls_tx(csk)) { - skb = get_record_skb(sk, - select_size(sk, size, - flags, - TX_TLSHDR_LEN), - true); - } else { - skb = get_tx_skb(sk, 0); - } - if (!skb) - goto wait_for_memory; - copy = mss; - } - if (copy > size) - copy = size; - - i = skb_shinfo(skb)->nr_frags; - if (skb_can_coalesce(skb, i, page, offset)) { - skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); - } else if (i < MAX_SKB_FRAGS) { - get_page(page); - skb_fill_page_desc(skb, i, page, offset, copy); - } else { - tx_skb_finalize(skb); - push_frames_if_head(sk); - goto new_buf; - } - - skb->len += copy; - if (skb->len == mss) - tx_skb_finalize(skb); - skb->data_len += copy; - skb->truesize += copy; - sk->sk_wmem_queued += copy; - tp->write_seq += copy; - copied += copy; - offset += copy; - size -= copy; - - if (corked(tp, flags) && - (sk_stream_wspace(sk) < sk_stream_min_wspace(sk))) - ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND; - - if (!size) - break; - - if (unlikely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND)) - push_frames_if_head(sk); - continue; -wait_for_sndbuf: - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); -wait_for_memory: - err = csk_wait_memory(cdev, sk, &timeo); - if (err) - goto do_error; - } -out: - csk_reset_flag(csk, CSK_TX_MORE_DATA); - if (copied) - chtls_tcp_push(sk, flags); -done: + chtls_tcp_push(sk, 0); release_sock(sk); - return copied; - -do_error: - if (copied) - goto out; - -out_err: - if (csk_conn_inline(csk)) - csk_reset_flag(csk, CSK_TX_MORE_DATA); - copied = sk_stream_error(sk, flags, err); - goto done; } static void chtls_select_window(struct sock *sk) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c index 1e55b12fee51..455a54708be4 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c @@ -606,7 +606,7 @@ static void __init chtls_init_ulp_ops(void) chtls_cpl_prot.destroy = chtls_destroy_sock; chtls_cpl_prot.shutdown = chtls_shutdown; chtls_cpl_prot.sendmsg = chtls_sendmsg; - chtls_cpl_prot.sendpage = chtls_sendpage; + chtls_cpl_prot.splice_eof = chtls_splice_eof; chtls_cpl_prot.recvmsg = chtls_recvmsg; chtls_cpl_prot.setsockopt = chtls_setsockopt; chtls_cpl_prot.getsockopt = chtls_getsockopt; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 0defd519ba62..0616b5fe241c 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1138,7 +1138,8 @@ static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter, (lancer_chip(adapter) || BE3_chip(adapter) || skb_vlan_tag_present(skb)) && is_ipv4_pkt(skb)) { ip = (struct iphdr *)ip_hdr(skb); - pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len)); + if (unlikely(pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len)))) + goto tx_drop; } /* If vlan tag is already inlined in the packet, skip HW VLAN @@ -2343,11 +2344,10 @@ static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb, hdr_len = ETH_HLEN; memcpy(skb->data, start, hdr_len); skb_shinfo(skb)->nr_frags = 1; - skb_frag_set_page(skb, 0, page_info->page); - skb_frag_off_set(&skb_shinfo(skb)->frags[0], - page_info->page_offset + hdr_len); - skb_frag_size_set(&skb_shinfo(skb)->frags[0], - curr_frag_len - hdr_len); + skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[0], + page_info->page, + page_info->page_offset + hdr_len, + curr_frag_len - hdr_len); skb->data_len = curr_frag_len - hdr_len; skb->truesize += rx_frag_size; skb->tail += hdr_len; @@ -2369,16 +2369,17 @@ static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb, if (page_info->page_offset == 0) { /* Fresh page */ j++; - skb_frag_set_page(skb, j, page_info->page); - skb_frag_off_set(&skb_shinfo(skb)->frags[j], - page_info->page_offset); - skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0); + skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[j], + page_info->page, + page_info->page_offset, + curr_frag_len); skb_shinfo(skb)->nr_frags++; } else { put_page(page_info->page); + skb_frag_size_add(&skb_shinfo(skb)->frags[j], + curr_frag_len); } - skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len); skb->len += curr_frag_len; skb->data_len += curr_frag_len; skb->truesize += rx_frag_size; @@ -2451,14 +2452,16 @@ static void be_rx_compl_process_gro(struct be_rx_obj *rxo, if (i == 0 || page_info->page_offset == 0) { /* First frag or Fresh page */ j++; - skb_frag_set_page(skb, j, page_info->page); - skb_frag_off_set(&skb_shinfo(skb)->frags[j], - page_info->page_offset); - skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0); + skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[j], + page_info->page, + page_info->page_offset, + curr_frag_len); } else { put_page(page_info->page); + skb_frag_size_add(&skb_shinfo(skb)->frags[j], + curr_frag_len); } - skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len); + skb->truesize += rx_frag_size; remaining -= curr_frag_len; memset(page_info, 0, sizeof(*page_info)); diff --git a/drivers/net/ethernet/engleder/tsnep_selftests.c b/drivers/net/ethernet/engleder/tsnep_selftests.c index 1581d6b22232..8a9145f93147 100644 --- a/drivers/net/ethernet/engleder/tsnep_selftests.c +++ b/drivers/net/ethernet/engleder/tsnep_selftests.c @@ -329,7 +329,7 @@ static bool disable_taprio(struct tsnep_adapter *adapter) int retval; memset(&qopt, 0, sizeof(qopt)); - qopt.enable = 0; + qopt.cmd = TAPRIO_CMD_DESTROY; retval = tsnep_tc_setup(adapter->netdev, TC_SETUP_QDISC_TAPRIO, &qopt); if (retval) return false; @@ -360,7 +360,7 @@ static bool tsnep_test_taprio(struct tsnep_adapter *adapter) for (i = 0; i < 255; i++) qopt->entries[i].command = TC_TAPRIO_CMD_SET_GATES; - qopt->enable = 1; + qopt->cmd = TAPRIO_CMD_REPLACE; qopt->base_time = ktime_set(0, 0); qopt->cycle_time = 1500000; qopt->cycle_time_extension = 0; @@ -382,7 +382,7 @@ static bool tsnep_test_taprio(struct tsnep_adapter *adapter) if (!run_taprio(adapter, qopt, 100)) goto failed; - qopt->enable = 1; + qopt->cmd = TAPRIO_CMD_REPLACE; qopt->base_time = ktime_set(0, 0); qopt->cycle_time = 411854; qopt->cycle_time_extension = 0; @@ -406,7 +406,7 @@ static bool tsnep_test_taprio(struct tsnep_adapter *adapter) if (!run_taprio(adapter, qopt, 100)) goto failed; - qopt->enable = 1; + qopt->cmd = TAPRIO_CMD_REPLACE; qopt->base_time = ktime_set(0, 0); delay_base_time(adapter, qopt, 12); qopt->cycle_time = 125000; @@ -457,7 +457,7 @@ static bool tsnep_test_taprio_change(struct tsnep_adapter *adapter) for (i = 0; i < 255; i++) qopt->entries[i].command = TC_TAPRIO_CMD_SET_GATES; - qopt->enable = 1; + qopt->cmd = TAPRIO_CMD_REPLACE; qopt->base_time = ktime_set(0, 0); qopt->cycle_time = 100000; qopt->cycle_time_extension = 0; @@ -610,7 +610,7 @@ static bool tsnep_test_taprio_extension(struct tsnep_adapter *adapter) for (i = 0; i < 255; i++) qopt->entries[i].command = TC_TAPRIO_CMD_SET_GATES; - qopt->enable = 1; + qopt->cmd = TAPRIO_CMD_REPLACE; qopt->base_time = ktime_set(0, 0); qopt->cycle_time = 100000; qopt->cycle_time_extension = 50000; diff --git a/drivers/net/ethernet/engleder/tsnep_tc.c b/drivers/net/ethernet/engleder/tsnep_tc.c index d083e6684f12..745b191a5540 100644 --- a/drivers/net/ethernet/engleder/tsnep_tc.c +++ b/drivers/net/ethernet/engleder/tsnep_tc.c @@ -325,7 +325,7 @@ static int tsnep_taprio(struct tsnep_adapter *adapter, if (!adapter->gate_control) return -EOPNOTSUPP; - if (!qopt->enable) { + if (qopt->cmd == TAPRIO_CMD_DESTROY) { /* disable gate control if active */ mutex_lock(&adapter->gate_control_lock); @@ -337,6 +337,8 @@ static int tsnep_taprio(struct tsnep_adapter *adapter, mutex_unlock(&adapter->gate_control_lock); return 0; + } else if (qopt->cmd != TAPRIO_CMD_REPLACE) { + return -EOPNOTSUPP; } retval = tsnep_validate_gcl(qopt); diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig index 1c78f66a89da..75401d2a5fb4 100644 --- a/drivers/net/ethernet/freescale/Kconfig +++ b/drivers/net/ethernet/freescale/Kconfig @@ -29,7 +29,7 @@ config FEC select CRC32 select PHYLIB select PAGE_POOL - select PAGE_POOL_STATS + imply PAGE_POOL_STATS imply NET_SELFTESTS help Say Y here if you want to use the built-in 10/100 Fast ethernet diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index a62cffaf6ff1..a9676d0dece8 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -5025,7 +5025,7 @@ err_wq_alloc: return err; } -static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) +static void dpaa2_eth_remove(struct fsl_mc_device *ls_dev) { struct device *dev; struct net_device *net_dev; @@ -5073,8 +5073,6 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) dev_dbg(net_dev->dev.parent, "Removed interface %s\n", net_dev->name); free_netdev(net_dev); - - return 0; } static const struct fsl_mc_device_id dpaa2_eth_match_id_table[] = { diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c index 00e50bd30189..a69bb22c37ea 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c @@ -252,8 +252,8 @@ static int dpaa2_pcs_create(struct dpaa2_mac *mac, struct fwnode_handle *dpmac_node, int id) { - struct mdio_device *mdiodev; struct fwnode_handle *node; + struct phylink_pcs *pcs; node = fwnode_find_reference(dpmac_node, "pcs-handle", 0); if (IS_ERR(node)) { @@ -262,26 +262,27 @@ static int dpaa2_pcs_create(struct dpaa2_mac *mac, return 0; } - if (!fwnode_device_is_available(node)) { - netdev_err(mac->net_dev, "pcs-handle node not available\n"); - fwnode_handle_put(node); - return -ENODEV; - } - - mdiodev = fwnode_mdio_find_device(node); + pcs = lynx_pcs_create_fwnode(node); fwnode_handle_put(node); - if (!mdiodev) { + + if (pcs == ERR_PTR(-EPROBE_DEFER)) { netdev_dbg(mac->net_dev, "missing PCS device\n"); return -EPROBE_DEFER; } - mac->pcs = lynx_pcs_create(mdiodev); - if (!mac->pcs) { - netdev_err(mac->net_dev, "lynx_pcs_create() failed\n"); - put_device(&mdiodev->dev); - return -ENOMEM; + if (pcs == ERR_PTR(-ENODEV)) { + netdev_err(mac->net_dev, "pcs-handle node not available\n"); + return PTR_ERR(pcs); + } + + if (IS_ERR(pcs)) { + netdev_err(mac->net_dev, + "lynx_pcs_create_fwnode() failed: %pe\n", pcs); + return PTR_ERR(pcs); } + mac->pcs = pcs; + return 0; } @@ -290,11 +291,7 @@ static void dpaa2_pcs_destroy(struct dpaa2_mac *mac) struct phylink_pcs *phylink_pcs = mac->pcs; if (phylink_pcs) { - struct mdio_device *mdio = lynx_get_mdio_device(phylink_pcs); - struct device *dev = &mdio->dev; - lynx_pcs_destroy(phylink_pcs); - put_device(dev); mac->pcs = NULL; } } diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c index 90d23ab1ce9d..4497e3c0456d 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c @@ -219,7 +219,7 @@ err_exit: return err; } -static int dpaa2_ptp_remove(struct fsl_mc_device *mc_dev) +static void dpaa2_ptp_remove(struct fsl_mc_device *mc_dev) { struct device *dev = &mc_dev->dev; struct ptp_qoriq *ptp_qoriq; @@ -232,8 +232,6 @@ static int dpaa2_ptp_remove(struct fsl_mc_device *mc_dev) fsl_mc_free_irqs(mc_dev); dprtc_close(mc_dev->mc_io, 0, mc_dev->mc_handle); fsl_mc_portal_free(mc_dev->mc_io); - - return 0; } static const struct fsl_mc_device_id dpaa2_ptp_match_id_table[] = { diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index f4ae4289c41a..21cc4e52425a 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -3221,7 +3221,7 @@ static void dpaa2_switch_teardown(struct fsl_mc_device *sw_dev) dev_warn(dev, "dpsw_close err %d\n", err); } -static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev) +static void dpaa2_switch_remove(struct fsl_mc_device *sw_dev) { struct ethsw_port_priv *port_priv; struct ethsw_core *ethsw; @@ -3252,8 +3252,6 @@ static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev) kfree(ethsw); dev_set_drvdata(dev, NULL); - - return 0; } static int dpaa2_switch_probe_port(struct ethsw_core *ethsw, diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 9e1b2536e9a9..35461165de0d 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1451,9 +1451,8 @@ static void enetc_add_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i, xdp_buff_set_frag_pfmemalloc(xdp_buff); frag = &shinfo->frags[shinfo->nr_frags]; - skb_frag_off_set(frag, rx_swbd->page_offset); - skb_frag_size_set(frag, size); - __skb_frag_set_page(frag, rx_swbd->page); + skb_frag_fill_page_desc(frag, rx_swbd->page, rx_swbd->page_offset, + size); shinfo->nr_frags++; } @@ -1790,7 +1789,7 @@ static int enetc_alloc_tx_resource(struct enetc_bdr_resource *res, res->bd_count = bd_count; res->bd_size = sizeof(union enetc_tx_bd); - res->tx_swbd = vzalloc(bd_count * sizeof(*res->tx_swbd)); + res->tx_swbd = vcalloc(bd_count, sizeof(*res->tx_swbd)); if (!res->tx_swbd) return -ENOMEM; @@ -1878,7 +1877,7 @@ static int enetc_alloc_rx_resource(struct enetc_bdr_resource *res, if (extended) res->bd_size *= 2; - res->rx_swbd = vzalloc(bd_count * sizeof(struct enetc_rx_swbd)); + res->rx_swbd = vcalloc(bd_count, sizeof(struct enetc_rx_swbd)); if (!res->rx_swbd) return -ENOMEM; @@ -2639,7 +2638,7 @@ static void enetc_debug_tx_ring_prios(struct enetc_ndev_priv *priv) priv->tx_ring[i]->prio); } -static void enetc_reset_tc_mqprio(struct net_device *ndev) +void enetc_reset_tc_mqprio(struct net_device *ndev) { struct enetc_ndev_priv *priv = netdev_priv(ndev); struct enetc_hw *hw = &priv->si->hw; @@ -2664,6 +2663,7 @@ static void enetc_reset_tc_mqprio(struct net_device *ndev) enetc_change_preemptible_tcs(priv, 0); } +EXPORT_SYMBOL_GPL(enetc_reset_tc_mqprio); int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data) { diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index c97a8e3d7a7f..8577cf7699a0 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -429,6 +429,7 @@ struct net_device_stats *enetc_get_stats(struct net_device *ndev); void enetc_set_features(struct net_device *ndev, netdev_features_t features); int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd); int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data); +void enetc_reset_tc_mqprio(struct net_device *ndev); int enetc_setup_bpf(struct net_device *ndev, struct netdev_bpf *bpf); int enetc_xdp_xmit(struct net_device *ndev, int num_frames, struct xdp_frame **frames, u32 flags); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 7cd22d370caa..c153dc083aff 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -863,7 +863,6 @@ static int enetc_imdio_create(struct enetc_pf *pf) struct device *dev = &pf->si->pdev->dev; struct enetc_mdio_priv *mdio_priv; struct phylink_pcs *phylink_pcs; - struct mdio_device *mdio_device; struct mii_bus *bus; int err; @@ -889,17 +888,9 @@ static int enetc_imdio_create(struct enetc_pf *pf) goto free_mdio_bus; } - mdio_device = mdio_device_create(bus, 0); - if (IS_ERR(mdio_device)) { - err = PTR_ERR(mdio_device); - dev_err(dev, "cannot create mdio device (%d)\n", err); - goto unregister_mdiobus; - } - - phylink_pcs = lynx_pcs_create(mdio_device); - if (!phylink_pcs) { - mdio_device_free(mdio_device); - err = -ENOMEM; + phylink_pcs = lynx_pcs_create_mdiodev(bus, 0); + if (IS_ERR(phylink_pcs)) { + err = PTR_ERR(phylink_pcs); dev_err(dev, "cannot create lynx pcs (%d)\n", err); goto unregister_mdiobus; } @@ -918,13 +909,8 @@ free_mdio_bus: static void enetc_imdio_remove(struct enetc_pf *pf) { - struct mdio_device *mdio_device; - - if (pf->pcs) { - mdio_device = lynx_get_mdio_device(pf->pcs); - mdio_device_free(mdio_device); + if (pf->pcs) lynx_pcs_destroy(pf->pcs); - } if (pf->imdio) { mdiobus_unregister(pf->imdio); mdiobus_free(pf->imdio); @@ -1200,14 +1186,9 @@ static int enetc_init_port_rss_memory(struct enetc_si *si) static int enetc_pf_register_with_ierb(struct pci_dev *pdev) { - struct device_node *node = pdev->dev.of_node; struct platform_device *ierb_pdev; struct device_node *ierb_node; - /* Don't register with the IERB if the PF itself is disabled */ - if (!node || !of_device_is_available(node)) - return 0; - ierb_node = of_find_compatible_node(NULL, NULL, "fsl,ls1028a-enetc-ierb"); if (!ierb_node || !of_device_is_available(ierb_node)) @@ -1222,56 +1203,81 @@ static int enetc_pf_register_with_ierb(struct pci_dev *pdev) return enetc_ierb_register_pf(ierb_pdev, pdev); } -static int enetc_pf_probe(struct pci_dev *pdev, - const struct pci_device_id *ent) +static struct enetc_si *enetc_psi_create(struct pci_dev *pdev) { - struct device_node *node = pdev->dev.of_node; - struct enetc_ndev_priv *priv; - struct net_device *ndev; struct enetc_si *si; - struct enetc_pf *pf; int err; - err = enetc_pf_register_with_ierb(pdev); - if (err == -EPROBE_DEFER) - return err; - if (err) - dev_warn(&pdev->dev, - "Could not register with IERB driver: %pe, please update the device tree\n", - ERR_PTR(err)); - - err = enetc_pci_probe(pdev, KBUILD_MODNAME, sizeof(*pf)); - if (err) - return dev_err_probe(&pdev->dev, err, "PCI probing failed\n"); + err = enetc_pci_probe(pdev, KBUILD_MODNAME, sizeof(struct enetc_pf)); + if (err) { + dev_err_probe(&pdev->dev, err, "PCI probing failed\n"); + goto out; + } si = pci_get_drvdata(pdev); if (!si->hw.port || !si->hw.global) { err = -ENODEV; dev_err(&pdev->dev, "could not map PF space, probing a VF?\n"); - goto err_map_pf_space; + goto out_pci_remove; } err = enetc_setup_cbdr(&pdev->dev, &si->hw, ENETC_CBDR_DEFAULT_SIZE, &si->cbd_ring); if (err) - goto err_setup_cbdr; + goto out_pci_remove; err = enetc_init_port_rfs_memory(si); if (err) { dev_err(&pdev->dev, "Failed to initialize RFS memory\n"); - goto err_init_port_rfs; + goto out_teardown_cbdr; } err = enetc_init_port_rss_memory(si); if (err) { dev_err(&pdev->dev, "Failed to initialize RSS memory\n"); - goto err_init_port_rss; + goto out_teardown_cbdr; } - if (node && !of_device_is_available(node)) { - dev_info(&pdev->dev, "device is disabled, skipping\n"); - err = -ENODEV; - goto err_device_disabled; + return si; + +out_teardown_cbdr: + enetc_teardown_cbdr(&si->cbd_ring); +out_pci_remove: + enetc_pci_remove(pdev); +out: + return ERR_PTR(err); +} + +static void enetc_psi_destroy(struct pci_dev *pdev) +{ + struct enetc_si *si = pci_get_drvdata(pdev); + + enetc_teardown_cbdr(&si->cbd_ring); + enetc_pci_remove(pdev); +} + +static int enetc_pf_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct device_node *node = pdev->dev.of_node; + struct enetc_ndev_priv *priv; + struct net_device *ndev; + struct enetc_si *si; + struct enetc_pf *pf; + int err; + + err = enetc_pf_register_with_ierb(pdev); + if (err == -EPROBE_DEFER) + return err; + if (err) + dev_warn(&pdev->dev, + "Could not register with IERB driver: %pe, please update the device tree\n", + ERR_PTR(err)); + + si = enetc_psi_create(pdev); + if (IS_ERR(si)) { + err = PTR_ERR(si); + goto err_psi_create; } pf = enetc_si_priv(si); @@ -1353,15 +1359,9 @@ err_alloc_si_res: si->ndev = NULL; free_netdev(ndev); err_alloc_netdev: -err_init_port_rss: -err_init_port_rfs: -err_device_disabled: err_setup_mac_addresses: - enetc_teardown_cbdr(&si->cbd_ring); -err_setup_cbdr: -err_map_pf_space: - enetc_pci_remove(pdev); - + enetc_psi_destroy(pdev); +err_psi_create: return err; } @@ -1384,12 +1384,29 @@ static void enetc_pf_remove(struct pci_dev *pdev) enetc_free_msix(priv); enetc_free_si_resources(priv); - enetc_teardown_cbdr(&si->cbd_ring); free_netdev(si->ndev); - enetc_pci_remove(pdev); + enetc_psi_destroy(pdev); +} + +static void enetc_fixup_clear_rss_rfs(struct pci_dev *pdev) +{ + struct device_node *node = pdev->dev.of_node; + struct enetc_si *si; + + /* Only apply quirk for disabled functions. For the ones + * that are enabled, enetc_pf_probe() will apply it. + */ + if (node && of_device_is_available(node)) + return; + + si = enetc_psi_create(pdev); + if (!IS_ERR(si)) + enetc_psi_destroy(pdev); } +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PF, + enetc_fixup_clear_rss_rfs); static const struct pci_device_id enetc_pf_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PF) }, diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c index 17c097cef7d4..5243fc031058 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c @@ -8,7 +8,7 @@ #include "enetc.h" int enetc_phc_index = -1; -EXPORT_SYMBOL(enetc_phc_index); +EXPORT_SYMBOL_GPL(enetc_phc_index); static struct ptp_clock_info enetc_ptp_caps = { .owner = THIS_MODULE, diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 126007ab70f6..270cbd5e8684 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -43,10 +43,9 @@ void enetc_sched_speed_set(struct enetc_ndev_priv *priv, int speed) enetc_port_wr(hw, ENETC_PMR, (tmp & ~ENETC_PMR_PSPEED_MASK) | pspeed); } -static int enetc_setup_taprio(struct net_device *ndev, +static int enetc_setup_taprio(struct enetc_ndev_priv *priv, struct tc_taprio_qopt_offload *admin_conf) { - struct enetc_ndev_priv *priv = netdev_priv(ndev); struct enetc_hw *hw = &priv->si->hw; struct enetc_cbd cbd = {.cmd = 0}; struct tgs_gcl_conf *gcl_config; @@ -60,19 +59,13 @@ static int enetc_setup_taprio(struct net_device *ndev, int err; int i; + /* TSD and Qbv are mutually exclusive in hardware */ + for (i = 0; i < priv->num_tx_rings; i++) + if (priv->tx_ring[i]->tsd_enable) + return -EBUSY; + if (admin_conf->num_entries > enetc_get_max_gcl_len(hw)) return -EINVAL; - gcl_len = admin_conf->num_entries; - - tge = enetc_rd(hw, ENETC_PTGCR); - if (!admin_conf->enable) { - enetc_wr(hw, ENETC_PTGCR, tge & ~ENETC_PTGCR_TGE); - enetc_reset_ptcmsdur(hw); - - priv->active_offloads &= ~ENETC_F_QBV; - - return 0; - } if (admin_conf->cycle_time > U32_MAX || admin_conf->cycle_time_extension > U32_MAX) @@ -82,6 +75,7 @@ static int enetc_setup_taprio(struct net_device *ndev, * control BD descriptor. */ gcl_config = &cbd.gcl_conf; + gcl_len = admin_conf->num_entries; data_size = struct_size(gcl_data, entry, gcl_len); tmp = enetc_cbd_alloc_data_mem(priv->si, &cbd, data_size, @@ -115,6 +109,7 @@ static int enetc_setup_taprio(struct net_device *ndev, cbd.cls = BDCR_CMD_PORT_GCL; cbd.status_flags = 0; + tge = enetc_rd(hw, ENETC_PTGCR); enetc_wr(hw, ENETC_PTGCR, tge | ENETC_PTGCR_TGE); err = enetc_send_cmd(priv->si, &cbd); @@ -132,25 +127,95 @@ static int enetc_setup_taprio(struct net_device *ndev, return 0; } -int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) +static void enetc_reset_taprio_stats(struct enetc_ndev_priv *priv) +{ + int i; + + for (i = 0; i < priv->num_tx_rings; i++) + priv->tx_ring[i]->stats.win_drop = 0; +} + +static void enetc_reset_taprio(struct enetc_ndev_priv *priv) +{ + struct enetc_hw *hw = &priv->si->hw; + u32 val; + + val = enetc_rd(hw, ENETC_PTGCR); + enetc_wr(hw, ENETC_PTGCR, val & ~ENETC_PTGCR_TGE); + enetc_reset_ptcmsdur(hw); + + priv->active_offloads &= ~ENETC_F_QBV; +} + +static void enetc_taprio_destroy(struct net_device *ndev) { - struct tc_taprio_qopt_offload *taprio = type_data; struct enetc_ndev_priv *priv = netdev_priv(ndev); - int err, i; - /* TSD and Qbv are mutually exclusive in hardware */ + enetc_reset_taprio(priv); + enetc_reset_tc_mqprio(ndev); + enetc_reset_taprio_stats(priv); +} + +static void enetc_taprio_stats(struct net_device *ndev, + struct tc_taprio_qopt_stats *stats) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + u64 window_drops = 0; + int i; + for (i = 0; i < priv->num_tx_rings; i++) - if (priv->tx_ring[i]->tsd_enable) - return -EBUSY; + window_drops += priv->tx_ring[i]->stats.win_drop; + + stats->window_drops = window_drops; +} + +static void enetc_taprio_queue_stats(struct net_device *ndev, + struct tc_taprio_qopt_queue_stats *queue_stats) +{ + struct tc_taprio_qopt_stats *stats = &queue_stats->stats; + struct enetc_ndev_priv *priv = netdev_priv(ndev); + int queue = queue_stats->queue; - err = enetc_setup_tc_mqprio(ndev, &taprio->mqprio); + stats->window_drops = priv->tx_ring[queue]->stats.win_drop; +} + +static int enetc_taprio_replace(struct net_device *ndev, + struct tc_taprio_qopt_offload *offload) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + int err; + + err = enetc_setup_tc_mqprio(ndev, &offload->mqprio); if (err) return err; - err = enetc_setup_taprio(ndev, taprio); - if (err) { - taprio->mqprio.qopt.num_tc = 0; - enetc_setup_tc_mqprio(ndev, &taprio->mqprio); + err = enetc_setup_taprio(priv, offload); + if (err) + enetc_reset_tc_mqprio(ndev); + + return err; +} + +int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) +{ + struct tc_taprio_qopt_offload *offload = type_data; + int err = 0; + + switch (offload->cmd) { + case TAPRIO_CMD_REPLACE: + err = enetc_taprio_replace(ndev, offload); + break; + case TAPRIO_CMD_DESTROY: + enetc_taprio_destroy(ndev); + break; + case TAPRIO_CMD_STATS: + enetc_taprio_stats(ndev, &offload->stats); + break; + case TAPRIO_CMD_QUEUE_STATS: + enetc_taprio_queue_stats(ndev, &offload->queue_stats); + break; + default: + err = -EOPNOTSUPP; } return err; diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 9939ccafb556..63a053dea819 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -355,7 +355,7 @@ struct bufdesc_ex { #define RX_RING_SIZE (FEC_ENET_RX_FRPPG * FEC_ENET_RX_PAGES) #define FEC_ENET_TX_FRSIZE 2048 #define FEC_ENET_TX_FRPPG (PAGE_SIZE / FEC_ENET_TX_FRSIZE) -#define TX_RING_SIZE 512 /* Must be power of two */ +#define TX_RING_SIZE 1024 /* Must be power of two */ #define TX_RING_MOD_MASK 511 /* for this to work */ #define BD_ENET_RX_INT 0x00800000 @@ -544,10 +544,23 @@ enum { XDP_STATS_TOTAL, }; +enum fec_txbuf_type { + FEC_TXBUF_T_SKB, + FEC_TXBUF_T_XDP_NDO, +}; + +struct fec_tx_buffer { + union { + struct sk_buff *skb; + struct xdp_frame *xdp; + }; + enum fec_txbuf_type type; +}; + struct fec_enet_priv_tx_q { struct bufdesc_prop bd; unsigned char *tx_bounce[TX_RING_SIZE]; - struct sk_buff *tx_skbuff[TX_RING_SIZE]; + struct fec_tx_buffer tx_buf[TX_RING_SIZE]; unsigned short tx_stop_threshold; unsigned short tx_wake_threshold; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 38e5b5abe067..66b5cbdb43b9 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -397,7 +397,7 @@ static void fec_dump(struct net_device *ndev) fec16_to_cpu(bdp->cbd_sc), fec32_to_cpu(bdp->cbd_bufaddr), fec16_to_cpu(bdp->cbd_datlen), - txq->tx_skbuff[index]); + txq->tx_buf[index].skb); bdp = fec_enet_get_nextdesc(bdp, &txq->bd); index++; } while (bdp != txq->bd.base); @@ -654,7 +654,7 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq, index = fec_enet_get_bd_index(last_bdp, &txq->bd); /* Save skb pointer */ - txq->tx_skbuff[index] = skb; + txq->tx_buf[index].skb = skb; /* Make sure the updates to rest of the descriptor are performed before * transferring ownership. @@ -672,9 +672,7 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq, skb_tx_timestamp(skb); - /* Make sure the update to bdp and tx_skbuff are performed before - * txq->bd.cur. - */ + /* Make sure the update to bdp is performed before txq->bd.cur. */ wmb(); txq->bd.cur = bdp; @@ -862,7 +860,7 @@ static int fec_enet_txq_submit_tso(struct fec_enet_priv_tx_q *txq, } /* Save skb pointer */ - txq->tx_skbuff[index] = skb; + txq->tx_buf[index].skb = skb; skb_tx_timestamp(skb); txq->bd.cur = bdp; @@ -952,16 +950,33 @@ static void fec_enet_bd_init(struct net_device *dev) for (i = 0; i < txq->bd.ring_size; i++) { /* Initialize the BD for every fragment in the page. */ bdp->cbd_sc = cpu_to_fec16(0); - if (bdp->cbd_bufaddr && - !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr))) - dma_unmap_single(&fep->pdev->dev, - fec32_to_cpu(bdp->cbd_bufaddr), - fec16_to_cpu(bdp->cbd_datlen), - DMA_TO_DEVICE); - if (txq->tx_skbuff[i]) { - dev_kfree_skb_any(txq->tx_skbuff[i]); - txq->tx_skbuff[i] = NULL; + if (txq->tx_buf[i].type == FEC_TXBUF_T_SKB) { + if (bdp->cbd_bufaddr && + !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr))) + dma_unmap_single(&fep->pdev->dev, + fec32_to_cpu(bdp->cbd_bufaddr), + fec16_to_cpu(bdp->cbd_datlen), + DMA_TO_DEVICE); + if (txq->tx_buf[i].skb) { + dev_kfree_skb_any(txq->tx_buf[i].skb); + txq->tx_buf[i].skb = NULL; + } + } else { + if (bdp->cbd_bufaddr) + dma_unmap_single(&fep->pdev->dev, + fec32_to_cpu(bdp->cbd_bufaddr), + fec16_to_cpu(bdp->cbd_datlen), + DMA_TO_DEVICE); + + if (txq->tx_buf[i].xdp) { + xdp_return_frame(txq->tx_buf[i].xdp); + txq->tx_buf[i].xdp = NULL; + } + + /* restore default tx buffer type: FEC_TXBUF_T_SKB */ + txq->tx_buf[i].type = FEC_TXBUF_T_SKB; } + bdp->cbd_bufaddr = cpu_to_fec32(0); bdp = fec_enet_get_nextdesc(bdp, &txq->bd); } @@ -1011,24 +1026,6 @@ static void fec_enet_enable_ring(struct net_device *ndev) } } -static void fec_enet_reset_skb(struct net_device *ndev) -{ - struct fec_enet_private *fep = netdev_priv(ndev); - struct fec_enet_priv_tx_q *txq; - int i, j; - - for (i = 0; i < fep->num_tx_queues; i++) { - txq = fep->tx_queue[i]; - - for (j = 0; j < txq->bd.ring_size; j++) { - if (txq->tx_skbuff[j]) { - dev_kfree_skb_any(txq->tx_skbuff[j]); - txq->tx_skbuff[j] = NULL; - } - } - } -} - /* * This function is called to start or restart the FEC during a link * change, transmit timeout, or to reconfigure the FEC. The network @@ -1071,9 +1068,6 @@ fec_restart(struct net_device *ndev) fec_enet_enable_ring(ndev); - /* Reset tx SKB buffers. */ - fec_enet_reset_skb(ndev); - /* Enable MII mode */ if (fep->full_duplex == DUPLEX_FULL) { /* FD enable */ @@ -1378,9 +1372,10 @@ fec_enet_hwtstamp(struct fec_enet_private *fep, unsigned ts, } static void -fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) +fec_enet_tx_queue(struct net_device *ndev, u16 queue_id, int budget) { struct fec_enet_private *fep; + struct xdp_frame *xdpf; struct bufdesc *bdp; unsigned short status; struct sk_buff *skb; @@ -1408,16 +1403,39 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) index = fec_enet_get_bd_index(bdp, &txq->bd); - skb = txq->tx_skbuff[index]; - txq->tx_skbuff[index] = NULL; - if (!IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr))) - dma_unmap_single(&fep->pdev->dev, - fec32_to_cpu(bdp->cbd_bufaddr), - fec16_to_cpu(bdp->cbd_datlen), - DMA_TO_DEVICE); - bdp->cbd_bufaddr = cpu_to_fec32(0); - if (!skb) - goto skb_done; + if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB) { + skb = txq->tx_buf[index].skb; + txq->tx_buf[index].skb = NULL; + if (bdp->cbd_bufaddr && + !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr))) + dma_unmap_single(&fep->pdev->dev, + fec32_to_cpu(bdp->cbd_bufaddr), + fec16_to_cpu(bdp->cbd_datlen), + DMA_TO_DEVICE); + bdp->cbd_bufaddr = cpu_to_fec32(0); + if (!skb) + goto tx_buf_done; + } else { + /* Tx processing cannot call any XDP (or page pool) APIs if + * the "budget" is 0. Because NAPI is called with budget of + * 0 (such as netpoll) indicates we may be in an IRQ context, + * however, we can't use the page pool from IRQ context. + */ + if (unlikely(!budget)) + break; + + xdpf = txq->tx_buf[index].xdp; + if (bdp->cbd_bufaddr) + dma_unmap_single(&fep->pdev->dev, + fec32_to_cpu(bdp->cbd_bufaddr), + fec16_to_cpu(bdp->cbd_datlen), + DMA_TO_DEVICE); + bdp->cbd_bufaddr = cpu_to_fec32(0); + if (!xdpf) { + txq->tx_buf[index].type = FEC_TXBUF_T_SKB; + goto tx_buf_done; + } + } /* Check for errors. */ if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC | @@ -1436,21 +1454,11 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) ndev->stats.tx_carrier_errors++; } else { ndev->stats.tx_packets++; - ndev->stats.tx_bytes += skb->len; - } - /* NOTE: SKBTX_IN_PROGRESS being set does not imply it's we who - * are to time stamp the packet, so we still need to check time - * stamping enabled flag. - */ - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS && - fep->hwts_tx_en) && - fep->bufdesc_ex) { - struct skb_shared_hwtstamps shhwtstamps; - struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; - - fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts), &shhwtstamps); - skb_tstamp_tx(skb, &shhwtstamps); + if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB) + ndev->stats.tx_bytes += skb->len; + else + ndev->stats.tx_bytes += xdpf->len; } /* Deferred means some collisions occurred during transmit, @@ -1459,10 +1467,32 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) if (status & BD_ENET_TX_DEF) ndev->stats.collisions++; - /* Free the sk buffer associated with this last transmit */ - dev_kfree_skb_any(skb); -skb_done: - /* Make sure the update to bdp and tx_skbuff are performed + if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB) { + /* NOTE: SKBTX_IN_PROGRESS being set does not imply it's we who + * are to time stamp the packet, so we still need to check time + * stamping enabled flag. + */ + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS && + fep->hwts_tx_en) && fep->bufdesc_ex) { + struct skb_shared_hwtstamps shhwtstamps; + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; + + fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts), &shhwtstamps); + skb_tstamp_tx(skb, &shhwtstamps); + } + + /* Free the sk buffer associated with this last transmit */ + dev_kfree_skb_any(skb); + } else { + xdp_return_frame(xdpf); + + txq->tx_buf[index].xdp = NULL; + /* restore default tx buffer type: FEC_TXBUF_T_SKB */ + txq->tx_buf[index].type = FEC_TXBUF_T_SKB; + } + +tx_buf_done: + /* Make sure the update to bdp and tx_buf are performed * before dirty_tx */ wmb(); @@ -1486,14 +1516,14 @@ skb_done: writel(0, txq->bd.reg_desc_active); } -static void fec_enet_tx(struct net_device *ndev) +static void fec_enet_tx(struct net_device *ndev, int budget) { struct fec_enet_private *fep = netdev_priv(ndev); int i; /* Make sure that AVB queues are processed first. */ for (i = fep->num_tx_queues - 1; i >= 0; i--) - fec_enet_tx_queue(ndev, i); + fec_enet_tx_queue(ndev, i, budget); } static void fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq, @@ -1836,7 +1866,7 @@ static int fec_enet_rx_napi(struct napi_struct *napi, int budget) do { done += fec_enet_rx(ndev, budget - done); - fec_enet_tx(ndev); + fec_enet_tx(ndev, budget); } while ((done < budget) && fec_enet_collect_events(fep)); if (done < budget) { @@ -2810,6 +2840,7 @@ static void fec_enet_get_xdp_stats(struct fec_enet_private *fep, u64 *data) static void fec_enet_page_pool_stats(struct fec_enet_private *fep, u64 *data) { +#ifdef CONFIG_PAGE_POOL_STATS struct page_pool_stats stats = {}; struct fec_enet_priv_rx_q *rxq; int i; @@ -2824,6 +2855,7 @@ static void fec_enet_page_pool_stats(struct fec_enet_private *fep, u64 *data) } page_pool_ethtool_stats_get(data, &stats); +#endif } static void fec_enet_get_ethtool_stats(struct net_device *dev, @@ -3268,9 +3300,19 @@ static void fec_enet_free_buffers(struct net_device *ndev) for (i = 0; i < txq->bd.ring_size; i++) { kfree(txq->tx_bounce[i]); txq->tx_bounce[i] = NULL; - skb = txq->tx_skbuff[i]; - txq->tx_skbuff[i] = NULL; - dev_kfree_skb(skb); + + if (txq->tx_buf[i].type == FEC_TXBUF_T_SKB) { + skb = txq->tx_buf[i].skb; + txq->tx_buf[i].skb = NULL; + dev_kfree_skb(skb); + } else { + if (txq->tx_buf[i].xdp) { + xdp_return_frame(txq->tx_buf[i].xdp); + txq->tx_buf[i].xdp = NULL; + } + + txq->tx_buf[i].type = FEC_TXBUF_T_SKB; + } } } } @@ -3315,8 +3357,7 @@ static int fec_enet_alloc_queue(struct net_device *ndev) fep->total_tx_ring_size += fep->tx_queue[i]->bd.ring_size; txq->tx_stop_threshold = FEC_MAX_SKB_DESCS; - txq->tx_wake_threshold = - (txq->bd.ring_size - txq->tx_stop_threshold) / 2; + txq->tx_wake_threshold = FEC_MAX_SKB_DESCS + 2 * MAX_SKB_FRAGS; txq->tso_hdrs = dma_alloc_coherent(&fep->pdev->dev, txq->bd.ring_size * TSO_HEADER_SIZE, @@ -3751,12 +3792,18 @@ static int fec_enet_bpf(struct net_device *dev, struct netdev_bpf *bpf) if (fep->quirks & FEC_QUIRK_SWAP_FRAME) return -EOPNOTSUPP; + if (!bpf->prog) + xdp_features_clear_redirect_target(dev); + if (is_run) { napi_disable(&fep->napi); netif_tx_disable(dev); } old_prog = xchg(&fep->xdp_prog, bpf->prog); + if (old_prog) + bpf_prog_put(old_prog); + fec_restart(dev); if (is_run) { @@ -3764,8 +3811,8 @@ static int fec_enet_bpf(struct net_device *dev, struct netdev_bpf *bpf) netif_tx_start_all_queues(dev); } - if (old_prog) - bpf_prog_put(old_prog); + if (bpf->prog) + xdp_features_set_redirect_target(dev, false); return 0; @@ -3791,19 +3838,18 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, struct xdp_frame *frame) { unsigned int index, status, estatus; - struct bufdesc *bdp, *last_bdp; + struct bufdesc *bdp; dma_addr_t dma_addr; int entries_free; entries_free = fec_enet_get_free_txdesc_num(txq); if (entries_free < MAX_SKB_FRAGS + 1) { - netdev_err(fep->netdev, "NOT enough BD for SG!\n"); - return NETDEV_TX_BUSY; + netdev_err_once(fep->netdev, "NOT enough BD for SG!\n"); + return -EBUSY; } /* Fill in a Tx ring entry */ bdp = txq->bd.cur; - last_bdp = bdp; status = fec16_to_cpu(bdp->cbd_sc); status &= ~BD_ENET_TX_STATS; @@ -3812,7 +3858,7 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, dma_addr = dma_map_single(&fep->pdev->dev, frame->data, frame->len, DMA_TO_DEVICE); if (dma_mapping_error(&fep->pdev->dev, dma_addr)) - return FEC_ENET_XDP_CONSUMED; + return -ENOMEM; status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST); if (fep->bufdesc_ex) @@ -3831,8 +3877,8 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, ebdp->cbd_esc = cpu_to_fec32(estatus); } - index = fec_enet_get_bd_index(last_bdp, &txq->bd); - txq->tx_skbuff[index] = NULL; + txq->tx_buf[index].type = FEC_TXBUF_T_XDP_NDO; + txq->tx_buf[index].xdp = frame; /* Make sure the updates to rest of the descriptor are performed before * transferring ownership. @@ -3846,7 +3892,7 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, bdp->cbd_sc = cpu_to_fec16(status); /* If this was the last BD in the ring, start at the beginning again. */ - bdp = fec_enet_get_nextdesc(last_bdp, &txq->bd); + bdp = fec_enet_get_nextdesc(bdp, &txq->bd); /* Make sure the update to bdp are performed before txq->bd.cur. */ dma_wmb(); @@ -3878,8 +3924,10 @@ static int fec_enet_xdp_xmit(struct net_device *dev, __netif_tx_lock(nq, cpu); + /* Avoid tx timeout as XDP shares the queue with kernel stack */ + txq_trans_cond_update(nq); for (i = 0; i < num_frames; i++) { - if (fec_enet_txq_xmit_frame(fep, txq, frames[i]) != 0) + if (fec_enet_txq_xmit_frame(fep, txq, frames[i]) < 0) break; sent_frames++; } @@ -4035,6 +4083,10 @@ static int fec_enet_init(struct net_device *ndev) ndev->hw_features = ndev->features; + if (!(fep->quirks & FEC_QUIRK_SWAP_FRAME)) + ndev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT; + fec_restart(ndev); if (fep->quirks & FEC_QUIRK_MIB_CLEAR) diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c index d528ca681b6f..3088da7adf0f 100644 --- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c +++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c @@ -763,15 +763,15 @@ static void dtsec_pcs_get_state(struct phylink_pcs *pcs, phylink_mii_c22_pcs_get_state(dtsec->tbidev, state); } -static int dtsec_pcs_config(struct phylink_pcs *pcs, unsigned int mode, +static int dtsec_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, bool permit_pause_to_mac) { struct fman_mac *dtsec = pcs_to_dtsec(pcs); - return phylink_mii_c22_pcs_config(dtsec->tbidev, mode, interface, - advertising); + return phylink_mii_c22_pcs_config(dtsec->tbidev, interface, + advertising, neg_mode); } static void dtsec_pcs_an_restart(struct phylink_pcs *pcs) @@ -1447,6 +1447,7 @@ int dtsec_initialization(struct mac_device *mac_dev, goto _return_fm_mac_free; } dtsec->pcs.ops = &dtsec_pcs_ops; + dtsec->pcs.neg_mode = true; dtsec->pcs.poll = true; supported = mac_dev->phylink_config.supported_interfaces; diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c index 625c79d5636f..3b75cc543be9 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.c +++ b/drivers/net/ethernet/freescale/fman/fman_memac.c @@ -976,14 +976,10 @@ static int memac_init(struct fman_mac *memac) static void pcs_put(struct phylink_pcs *pcs) { - struct mdio_device *mdiodev; - if (IS_ERR_OR_NULL(pcs)) return; - mdiodev = lynx_get_mdio_device(pcs); lynx_pcs_destroy(pcs); - mdio_device_free(mdiodev); } static int memac_free(struct fman_mac *memac) @@ -1043,20 +1039,14 @@ static struct phylink_pcs *memac_pcs_create(struct device_node *mac_node, int index) { struct device_node *node; - struct mdio_device *mdiodev = NULL; struct phylink_pcs *pcs; node = of_parse_phandle(mac_node, "pcsphy-handle", index); - if (node && of_device_is_available(node)) - mdiodev = of_mdio_find_device(node); - of_node_put(node); + if (!node) + return ERR_PTR(-ENODEV); - if (!mdiodev) - return ERR_PTR(-EPROBE_DEFER); - - pcs = lynx_pcs_create(mdiodev); - if (!pcs) - mdio_device_free(mdiodev); + pcs = lynx_pcs_create_fwnode(of_fwnode_handle(node)); + of_node_put(node); return pcs; } diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c index d37d7a19a759..59a8f0bd0f5c 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c +++ b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c @@ -127,7 +127,7 @@ static int fs_enet_mdio_probe(struct platform_device *ofdev) if (ret) goto out_res; - snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", res.start); + snprintf(new_bus->id, MII_BUS_ID_SIZE, "%pap", &res.start); fec->fecp = ioremap(res.start, resource_size(&res)); if (!fec->fecp) { diff --git a/drivers/net/ethernet/fungible/funeth/funeth_rx.c b/drivers/net/ethernet/fungible/funeth/funeth_rx.c index 29a6c2ede43a..7e2584895de3 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_rx.c +++ b/drivers/net/ethernet/fungible/funeth/funeth_rx.c @@ -323,9 +323,8 @@ static int fun_gather_pkt(struct funeth_rxq *q, unsigned int tot_len, if (ref_ok) ref_ok |= buf->node; - __skb_frag_set_page(frags, buf->page); - skb_frag_off_set(frags, q->buf_offset); - skb_frag_size_set(frags++, frag_len); + skb_frag_fill_page_desc(frags++, buf->page, q->buf_offset, + frag_len); tot_len -= frag_len; if (!tot_len) diff --git a/drivers/net/ethernet/fungible/funeth/funeth_tx.c b/drivers/net/ethernet/fungible/funeth/funeth_tx.c index 706d81e39a54..8ddefd3ec15b 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_tx.c +++ b/drivers/net/ethernet/fungible/funeth/funeth_tx.c @@ -348,8 +348,7 @@ netdev_tx_t fun_start_xmit(struct sk_buff *skb, struct net_device *netdev) unsigned int tls_len = 0; unsigned int ndesc; - if (IS_ENABLED(CONFIG_TLS_DEVICE) && skb->sk && - tls_is_sk_tx_device_offloaded(skb->sk)) { + if (tls_is_skb_tx_device_offloaded(skb)) { skb = fun_tls_tx(skb, q, &tls_len); if (unlikely(!skb)) goto dropped; diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 98eb78d98e9f..4b425bf71ede 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -964,5 +964,6 @@ void gve_handle_report_stats(struct gve_priv *priv); /* exported by ethtool.c */ extern const struct ethtool_ops gve_ethtool_ops; /* needed by ethtool */ +extern char gve_driver_name[]; extern const char gve_version_str[]; #endif /* _GVE_H_ */ diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index cfd4b8d284d1..233e5946905e 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -15,7 +15,7 @@ static void gve_get_drvinfo(struct net_device *netdev, { struct gve_priv *priv = netdev_priv(netdev); - strscpy(info->driver, "gve", sizeof(info->driver)); + strscpy(info->driver, gve_driver_name, sizeof(info->driver)); strscpy(info->version, gve_version_str, sizeof(info->version)); strscpy(info->bus_info, pci_name(priv->pdev), sizeof(info->bus_info)); } @@ -590,6 +590,9 @@ static int gve_get_link_ksettings(struct net_device *netdev, err = gve_adminq_report_link_speed(priv); cmd->base.speed = priv->link_speed; + + cmd->base.duplex = DUPLEX_FULL; + return err; } diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index caa00c72aeeb..e6f1711d9be0 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -31,7 +31,9 @@ // Minimum amount of time between queue kicks in msec (10 seconds) #define MIN_TX_TIMEOUT_GAP (1000 * 10) +#define DQO_TX_MAX 0x3FFFF +char gve_driver_name[] = "gve"; const char gve_version_str[] = GVE_VERSION; static const char gve_version_prefix[] = GVE_VERSION_PREFIX; @@ -2047,6 +2049,10 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) goto err; } + /* Big TCP is only supported on DQ*/ + if (!gve_is_gqi(priv)) + netif_set_tso_max_size(priv->dev, DQO_TX_MAX); + priv->num_registered_pages = 0; priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; /* gvnic has one Notification Block per MSI-x vector, except for the @@ -2195,7 +2201,7 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; - err = pci_request_regions(pdev, "gvnic-cfg"); + err = pci_request_regions(pdev, gve_driver_name); if (err) goto abort_with_enabled; @@ -2388,8 +2394,8 @@ static const struct pci_device_id gve_id_table[] = { { } }; -static struct pci_driver gvnic_driver = { - .name = "gvnic", +static struct pci_driver gve_driver = { + .name = gve_driver_name, .id_table = gve_id_table, .probe = gve_probe, .remove = gve_remove, @@ -2400,10 +2406,10 @@ static struct pci_driver gvnic_driver = { #endif }; -module_pci_driver(gvnic_driver); +module_pci_driver(gve_driver); MODULE_DEVICE_TABLE(pci, gve_id_table); MODULE_AUTHOR("Google, Inc."); -MODULE_DESCRIPTION("gVNIC Driver"); +MODULE_DESCRIPTION("Google Virtual NIC Driver"); MODULE_LICENSE("Dual MIT/GPL"); MODULE_VERSION(GVE_VERSION); diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index e57b73eb70f6..ac041cc5714c 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -492,7 +492,10 @@ static int gve_rx_append_frags(struct napi_struct *napi, if (!skb) return -1; - skb_shinfo(rx->ctx.skb_tail)->frag_list = skb; + if (rx->ctx.skb_tail == rx->ctx.skb_head) + skb_shinfo(rx->ctx.skb_head)->frag_list = skb; + else + rx->ctx.skb_tail->next = skb; rx->ctx.skb_tail = skb; num_frags = 0; } diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 813da572abca..6957a865cff3 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -248,7 +248,7 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) tx->mask = slots - 1; /* alloc metadata */ - tx->info = vzalloc(sizeof(*tx->info) * slots); + tx->info = vcalloc(slots, sizeof(*tx->info)); if (!tx->info) return -ENOMEM; diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index b76143bfd594..3c09e66ba1ab 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -8,6 +8,7 @@ #include "gve_adminq.h" #include "gve_utils.h" #include "gve_dqo.h" +#include <net/ip.h> #include <linux/tcp.h> #include <linux/slab.h> #include <linux/skbuff.h> @@ -646,6 +647,9 @@ static int gve_try_tx_skb(struct gve_priv *priv, struct gve_tx_ring *tx, goto drop; } + if (unlikely(ipv6_hopopt_jumbo_remove(skb))) + goto drop; + num_buffer_descs = gve_num_buffer_descs_needed(skb); } else { num_buffer_descs = gve_num_buffer_descs_needed(skb); diff --git a/drivers/net/ethernet/hisilicon/hns3/Makefile b/drivers/net/ethernet/hisilicon/hns3/Makefile index 6efea4662858..e214bfaece1f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/Makefile +++ b/drivers/net/ethernet/hisilicon/hns3/Makefile @@ -17,11 +17,11 @@ hns3-$(CONFIG_HNS3_DCB) += hns3_dcbnl.o obj-$(CONFIG_HNS3_HCLGEVF) += hclgevf.o -hclgevf-objs = hns3vf/hclgevf_main.o hns3vf/hclgevf_mbx.o hns3vf/hclgevf_devlink.o \ +hclgevf-objs = hns3vf/hclgevf_main.o hns3vf/hclgevf_mbx.o hns3vf/hclgevf_devlink.o hns3vf/hclgevf_regs.o \ hns3_common/hclge_comm_cmd.o hns3_common/hclge_comm_rss.o hns3_common/hclge_comm_tqp_stats.o obj-$(CONFIG_HNS3_HCLGE) += hclge.o -hclge-objs = hns3pf/hclge_main.o hns3pf/hclge_mdio.o hns3pf/hclge_tm.o \ +hclge-objs = hns3pf/hclge_main.o hns3pf/hclge_mdio.o hns3pf/hclge_tm.o hns3pf/hclge_regs.o \ hns3pf/hclge_mbx.o hns3pf/hclge_err.o hns3pf/hclge_debugfs.o hns3pf/hclge_ptp.o hns3pf/hclge_devlink.o \ hns3_common/hclge_comm_cmd.o hns3_common/hclge_comm_rss.o hns3_common/hclge_comm_tqp_stats.o diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 9c9c72dc57e0..aaf1f42624a7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -31,6 +31,7 @@ #include <linux/pci.h> #include <linux/pkt_sched.h> #include <linux/types.h> +#include <linux/bitmap.h> #include <net/pkt_cls.h> #include <net/pkt_sched.h> @@ -101,6 +102,7 @@ enum HNAE3_DEV_CAP_BITS { HNAE3_DEV_SUPPORT_FEC_STATS_B, HNAE3_DEV_SUPPORT_LANE_NUM_B, HNAE3_DEV_SUPPORT_WOL_B, + HNAE3_DEV_SUPPORT_TM_FLUSH_B, }; #define hnae3_ae_dev_fd_supported(ae_dev) \ @@ -172,6 +174,9 @@ enum HNAE3_DEV_CAP_BITS { #define hnae3_ae_dev_wol_supported(ae_dev) \ test_bit(HNAE3_DEV_SUPPORT_WOL_B, (ae_dev)->caps) +#define hnae3_ae_dev_tm_flush_supported(hdev) \ + test_bit(HNAE3_DEV_SUPPORT_TM_FLUSH_B, (hdev)->ae_dev->caps) + enum HNAE3_PF_CAP_BITS { HNAE3_PF_SUPPORT_VLAN_FLTR_MDF_B = 0, }; @@ -377,6 +382,7 @@ struct hnae3_dev_specs { u16 umv_size; u16 mc_mac_size; u32 mac_stats_num; + u8 tnl_num; }; struct hnae3_client_ops { @@ -407,7 +413,7 @@ struct hnae3_ae_dev { unsigned long hw_err_reset_req; struct hnae3_dev_specs dev_specs; u32 dev_version; - unsigned long caps[BITS_TO_LONGS(HNAE3_DEV_CAPS_MAX_NUM)]; + DECLARE_BITMAP(caps, HNAE3_DEV_CAPS_MAX_NUM); void *priv; }; @@ -647,8 +653,7 @@ struct hnae3_ae_ops { int (*rm_mc_addr)(struct hnae3_handle *handle, const unsigned char *addr); void (*set_tso_stats)(struct hnae3_handle *handle, int enable); - void (*update_stats)(struct hnae3_handle *handle, - struct net_device_stats *net_stats); + void (*update_stats)(struct hnae3_handle *handle); void (*get_stats)(struct hnae3_handle *handle, u64 *data); void (*get_mac_stats)(struct hnae3_handle *handle, struct hns3_mac_stats *mac_stats); @@ -809,6 +814,7 @@ struct hnae3_tc_info { u8 max_tc; /* Total number of TCs */ u8 num_tc; /* Total number of enabled TCs */ bool mqprio_active; + bool dcb_ets_active; }; #define HNAE3_MAX_DSCP 64 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c index b85c412683dd..dcecb23daac6 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c @@ -156,6 +156,7 @@ static const struct hclge_comm_caps_bit_map hclge_pf_cmd_caps[] = { {HCLGE_COMM_CAP_FEC_STATS_B, HNAE3_DEV_SUPPORT_FEC_STATS_B}, {HCLGE_COMM_CAP_LANE_NUM_B, HNAE3_DEV_SUPPORT_LANE_NUM_B}, {HCLGE_COMM_CAP_WOL_B, HNAE3_DEV_SUPPORT_WOL_B}, + {HCLGE_COMM_CAP_TM_FLUSH_B, HNAE3_DEV_SUPPORT_TM_FLUSH_B}, }; static const struct hclge_comm_caps_bit_map hclge_vf_cmd_caps[] = { @@ -172,6 +173,20 @@ static const struct hclge_comm_caps_bit_map hclge_vf_cmd_caps[] = { }; static void +hclge_comm_capability_to_bitmap(unsigned long *bitmap, __le32 *caps) +{ + const unsigned int words = HCLGE_COMM_QUERY_CAP_LENGTH; + u32 val[HCLGE_COMM_QUERY_CAP_LENGTH]; + unsigned int i; + + for (i = 0; i < words; i++) + val[i] = __le32_to_cpu(caps[i]); + + bitmap_from_arr32(bitmap, val, + HCLGE_COMM_QUERY_CAP_LENGTH * BITS_PER_TYPE(u32)); +} + +static void hclge_comm_parse_capability(struct hnae3_ae_dev *ae_dev, bool is_pf, struct hclge_comm_query_version_cmd *cmd) { @@ -179,11 +194,12 @@ hclge_comm_parse_capability(struct hnae3_ae_dev *ae_dev, bool is_pf, is_pf ? hclge_pf_cmd_caps : hclge_vf_cmd_caps; u32 size = is_pf ? ARRAY_SIZE(hclge_pf_cmd_caps) : ARRAY_SIZE(hclge_vf_cmd_caps); - u32 caps, i; + DECLARE_BITMAP(caps, HCLGE_COMM_QUERY_CAP_LENGTH * BITS_PER_TYPE(u32)); + u32 i; - caps = __le32_to_cpu(cmd->caps[0]); + hclge_comm_capability_to_bitmap(caps, cmd->caps); for (i = 0; i < size; i++) - if (hnae3_get_bit(caps, caps_map[i].imp_bit)) + if (test_bit(caps_map[i].imp_bit, caps)) set_bit(caps_map[i].local_bit, ae_dev->caps); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h index 18f1b4bf362d..2b7197ce0ae8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h @@ -153,6 +153,7 @@ enum hclge_opcode_type { HCLGE_OPC_TM_INTERNAL_STS = 0x0850, HCLGE_OPC_TM_INTERNAL_CNT = 0x0851, HCLGE_OPC_TM_INTERNAL_STS_1 = 0x0852, + HCLGE_OPC_TM_FLUSH = 0x0872, /* Packet buffer allocate commands */ HCLGE_OPC_TX_BUFF_ALLOC = 0x0901, @@ -349,6 +350,7 @@ enum HCLGE_COMM_CAP_BITS { HCLGE_COMM_CAP_FEC_STATS_B = 25, HCLGE_COMM_CAP_LANE_NUM_B = 27, HCLGE_COMM_CAP_WOL_B = 28, + HCLGE_COMM_CAP_TM_FLUSH_B = 31, }; enum HCLGE_COMM_API_CAP_BITS { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.c index ae2736549526..b4ae2160aff4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.c @@ -305,8 +305,7 @@ int hclge_comm_set_rss_indir_table(struct hnae3_ae_dev *ae_dev, return 0; } -int hclge_comm_set_rss_input_tuple(struct hnae3_handle *nic, - struct hclge_comm_hw *hw, bool is_pf, +int hclge_comm_set_rss_input_tuple(struct hclge_comm_hw *hw, struct hclge_comm_rss_cfg *rss_cfg) { struct hclge_comm_rss_input_tuple_cmd *req; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.h b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.h index 92af3d2980d3..cdafa63fe38b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.h @@ -112,8 +112,7 @@ int hclge_comm_init_rss_tuple_cmd(struct hclge_comm_rss_cfg *rss_cfg, struct hnae3_ae_dev *ae_dev, struct hclge_comm_rss_input_tuple_cmd *req); u64 hclge_comm_convert_rss_tuple(u8 tuple_sets); -int hclge_comm_set_rss_input_tuple(struct hnae3_handle *nic, - struct hclge_comm_hw *hw, bool is_pf, +int hclge_comm_set_rss_input_tuple(struct hclge_comm_hw *hw, struct hclge_comm_rss_cfg *rss_cfg); int hclge_comm_set_rss_indir_table(struct hnae3_ae_dev *ae_dev, struct hclge_comm_hw *hw, const u16 *indir); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index d385ffc21876..26fb6fefcb9d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -411,6 +411,9 @@ static struct hns3_dbg_cap_info hns3_dbg_cap[] = { }, { .name = "support wake on lan", .cap_bit = HNAE3_DEV_SUPPORT_WOL_B, + }, { + .name = "support tm flush", + .cap_bit = HNAE3_DEV_SUPPORT_TM_FLUSH_B, } }; @@ -438,19 +441,36 @@ static void hns3_dbg_fill_content(char *content, u16 len, const struct hns3_dbg_item *items, const char **result, u16 size) { +#define HNS3_DBG_LINE_END_LEN 2 char *pos = content; + u16 item_len; u16 i; + if (!len) { + return; + } else if (len <= HNS3_DBG_LINE_END_LEN) { + *pos++ = '\0'; + return; + } + memset(content, ' ', len); - for (i = 0; i < size; i++) { - if (result) - strncpy(pos, result[i], strlen(result[i])); - else - strncpy(pos, items[i].name, strlen(items[i].name)); + len -= HNS3_DBG_LINE_END_LEN; - pos += strlen(items[i].name) + items[i].interval; + for (i = 0; i < size; i++) { + item_len = strlen(items[i].name) + items[i].interval; + if (len < item_len) + break; + + if (result) { + if (item_len < strlen(result[i])) + break; + memcpy(pos, result[i], strlen(result[i])); + } else { + memcpy(pos, items[i].name, strlen(items[i].name)); + } + pos += item_len; + len -= item_len; } - *pos++ = '\n'; *pos++ = '\0'; } @@ -941,8 +961,7 @@ static const struct hns3_dbg_item tx_bd_info_items[] = { { "MSS_HW_CSUM", 0 }, }; -static void hns3_dump_tx_bd_info(struct hns3_nic_priv *priv, - struct hns3_desc *desc, char **result, int idx) +static void hns3_dump_tx_bd_info(struct hns3_desc *desc, char **result, int idx) { unsigned int j = 0; @@ -991,7 +1010,7 @@ static int hns3_dbg_tx_bd_info(struct hns3_dbg_data *d, char *buf, int len) for (i = 0; i < ring->desc_num; i++) { desc = &ring->desc[i]; - hns3_dump_tx_bd_info(priv, desc, result, i); + hns3_dump_tx_bd_info(desc, result, i); hns3_dbg_fill_content(content, sizeof(content), tx_bd_info_items, (const char **)result, ARRAY_SIZE(tx_bd_info_items)); @@ -1392,9 +1411,9 @@ int hns3_dbg_init(struct hnae3_handle *handle) return 0; out: - mutex_destroy(&handle->dbgfs_lock); debugfs_remove_recursive(handle->hnae3_dbgfs); handle->hnae3_dbgfs = NULL; + mutex_destroy(&handle->dbgfs_lock); return ret; } @@ -1402,6 +1421,9 @@ void hns3_dbg_uninit(struct hnae3_handle *handle) { u32 i; + debugfs_remove_recursive(handle->hnae3_dbgfs); + handle->hnae3_dbgfs = NULL; + for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd); i++) if (handle->dbgfs_buf[i]) { kvfree(handle->dbgfs_buf[i]); @@ -1409,8 +1431,6 @@ void hns3_dbg_uninit(struct hnae3_handle *handle) } mutex_destroy(&handle->dbgfs_lock); - debugfs_remove_recursive(handle->hnae3_dbgfs); - handle->hnae3_dbgfs = NULL; } void hns3_dbg_register_debugfs(const char *debugfs_dir_name) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index b676496ec6d7..613d0a779cef 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -2102,8 +2102,12 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num, */ if (test_bit(HNS3_NIC_STATE_TX_PUSH_ENABLE, &priv->state) && num && !ring->pending_buf && num <= HNS3_MAX_PUSH_BD_NUM && doorbell) { + /* This smp_store_release() pairs with smp_load_aquire() in + * hns3_nic_reclaim_desc(). Ensure that the BD valid bit + * is updated. + */ + smp_store_release(&ring->last_to_use, ring->next_to_use); hns3_tx_push_bd(ring, num); - WRITE_ONCE(ring->last_to_use, ring->next_to_use); return; } @@ -2114,6 +2118,11 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num, return; } + /* This smp_store_release() pairs with smp_load_aquire() in + * hns3_nic_reclaim_desc(). Ensure that the BD valid bit is updated. + */ + smp_store_release(&ring->last_to_use, ring->next_to_use); + if (ring->tqp->mem_base) hns3_tx_mem_doorbell(ring); else @@ -2121,7 +2130,6 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num, ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG); ring->pending_buf = 0; - WRITE_ONCE(ring->last_to_use, ring->next_to_use); } static void hns3_tsyn(struct net_device *netdev, struct sk_buff *skb, @@ -2538,7 +2546,7 @@ static void hns3_nic_get_stats64(struct net_device *netdev, if (test_bit(HNS3_NIC_STATE_DOWN, &priv->state)) return; - handle->ae_algo->ops->update_stats(handle, &netdev->stats); + handle->ae_algo->ops->update_stats(handle); memset(&ring_total_stats, 0, sizeof(ring_total_stats)); for (idx = 0; idx < queue_num; idx++) { @@ -3307,8 +3315,6 @@ static void hns3_set_default_feature(struct net_device *netdev) netdev->priv_flags |= IFF_UNICAST_FLT; - netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM; - netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | @@ -3562,9 +3568,8 @@ static void hns3_reuse_buffer(struct hns3_enet_ring *ring, int i) static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring, int *bytes, int *pkts, int budget) { - /* pair with ring->last_to_use update in hns3_tx_doorbell(), - * smp_store_release() is not used in hns3_tx_doorbell() because - * the doorbell operation already have the needed barrier operation. + /* This smp_load_acquire() pairs with smp_store_release() in + * hns3_tx_doorbell(). */ int ltu = smp_load_acquire(&ring->last_to_use); int ntc = ring->next_to_clean; @@ -5854,6 +5859,9 @@ void hns3_external_lb_prepare(struct net_device *ndev, bool if_running) if (!if_running) return; + if (test_and_set_bit(HNS3_NIC_STATE_DOWN, &priv->state)) + return; + netif_carrier_off(ndev); netif_tx_disable(ndev); @@ -5882,7 +5890,16 @@ void hns3_external_lb_restore(struct net_device *ndev, bool if_running) if (!if_running) return; - hns3_nic_reset_all_ring(priv->ae_handle); + if (hns3_nic_resetting(ndev)) + return; + + if (!test_bit(HNS3_NIC_STATE_DOWN, &priv->state)) + return; + + if (hns3_nic_reset_all_ring(priv->ae_handle)) + return; + + clear_bit(HNS3_NIC_STATE_DOWN, &priv->state); for (i = 0; i < priv->vector_num; i++) hns3_vector_enable(&priv->tqp_vector[i]); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 51d1278b18f6..64858b3114ac 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -228,7 +228,7 @@ static u32 hns3_lb_check_rx_ring(struct hns3_nic_priv *priv, u32 budget) } static void hns3_lb_clear_tx_ring(struct hns3_nic_priv *priv, u32 start_ringid, - u32 end_ringid, u32 budget) + u32 end_ringid) { u32 i; @@ -295,8 +295,7 @@ static int hns3_lp_run_test(struct net_device *ndev, enum hnae3_loop mode) out: hns3_lb_clear_tx_ring(priv, HNS3_NIC_LB_TEST_RING_ID, - HNS3_NIC_LB_TEST_RING_ID, - HNS3_NIC_LB_TEST_PKT_NUM); + HNS3_NIC_LB_TEST_RING_ID); kfree_skb(skb); return ret_val; @@ -618,7 +617,7 @@ static void hns3_get_stats(struct net_device *netdev, return; } - h->ae_algo->ops->update_stats(h, &netdev->stats); + h->ae_algo->ops->update_stats(h); /* get per-queue stats */ p = hns3_get_stats_tqps(h, p); @@ -774,7 +773,9 @@ static int hns3_get_link_ksettings(struct net_device *netdev, hns3_get_ksettings(h, cmd); break; case HNAE3_MEDIA_TYPE_FIBER: - if (module_type == HNAE3_MODULE_TYPE_CR) + if (module_type == HNAE3_MODULE_TYPE_UNKNOWN) + cmd->base.port = PORT_OTHER; + else if (module_type == HNAE3_MODULE_TYPE_CR) cmd->base.port = PORT_DA; else cmd->base.port = PORT_FIBRE; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 91c173f40701..d5cfdc4c082d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -826,7 +826,9 @@ struct hclge_dev_specs_1_cmd { u8 rsv0[2]; __le16 umv_size; __le16 mc_mac_size; - u8 rsv1[12]; + u8 rsv1[6]; + u8 tnl_num; + u8 rsv2[5]; }; /* mac speed type defined in firmware command */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c index c4aded65e848..b98301e205f7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c @@ -52,7 +52,10 @@ static void hclge_tm_info_to_ieee_ets(struct hclge_dev *hdev, for (i = 0; i < HNAE3_MAX_TC; i++) { ets->prio_tc[i] = hdev->tm_info.prio_tc[i]; - ets->tc_tx_bw[i] = hdev->tm_info.pg_info[0].tc_dwrr[i]; + if (i < hdev->tm_info.num_tc) + ets->tc_tx_bw[i] = hdev->tm_info.pg_info[0].tc_dwrr[i]; + else + ets->tc_tx_bw[i] = 0; if (hdev->tm_info.tc_info[i].tc_sch_mode == HCLGE_SCH_MODE_SP) @@ -123,7 +126,8 @@ static u8 hclge_ets_tc_changed(struct hclge_dev *hdev, struct ieee_ets *ets, } static int hclge_ets_sch_mode_validate(struct hclge_dev *hdev, - struct ieee_ets *ets, bool *changed) + struct ieee_ets *ets, bool *changed, + u8 tc_num) { bool has_ets_tc = false; u32 total_ets_bw = 0; @@ -137,6 +141,13 @@ static int hclge_ets_sch_mode_validate(struct hclge_dev *hdev, *changed = true; break; case IEEE_8021QAZ_TSA_ETS: + if (i >= tc_num) { + dev_err(&hdev->pdev->dev, + "tc%u is disabled, cannot set ets bw\n", + i); + return -EINVAL; + } + /* The hardware will switch to sp mode if bandwidth is * 0, so limit ets bandwidth must be greater than 0. */ @@ -176,7 +187,7 @@ static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets, if (ret) return ret; - ret = hclge_ets_sch_mode_validate(hdev, ets, changed); + ret = hclge_ets_sch_mode_validate(hdev, ets, changed, tc_num); if (ret) return ret; @@ -216,6 +227,10 @@ static int hclge_notify_down_uinit(struct hclge_dev *hdev) if (ret) return ret; + ret = hclge_tm_flush_cfg(hdev, true); + if (ret) + return ret; + return hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT); } @@ -227,6 +242,10 @@ static int hclge_notify_init_up(struct hclge_dev *hdev) if (ret) return ret; + ret = hclge_tm_flush_cfg(hdev, false); + if (ret) + return ret; + return hclge_notify_client(hdev, HNAE3_UP_CLIENT); } @@ -240,7 +259,7 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets) int ret; if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) || - hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE) + h->kinfo.tc_info.mqprio_active) return -EINVAL; ret = hclge_ets_validate(hdev, ets, &num_tc, &map_changed); @@ -256,10 +275,7 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets) } hclge_tm_schd_info_update(hdev, num_tc); - if (num_tc > 1) - hdev->flag |= HCLGE_FLAG_DCB_ENABLE; - else - hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE; + h->kinfo.tc_info.dcb_ets_active = num_tc > 1; ret = hclge_ieee_ets_to_tm_info(hdev, ets); if (ret) @@ -313,6 +329,7 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc) struct net_device *netdev = h->kinfo.netdev; struct hclge_dev *hdev = vport->back; u8 i, j, pfc_map, *prio_tc; + int last_bad_ret = 0; int ret; if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)) @@ -350,13 +367,28 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc) if (ret) return ret; - ret = hclge_buffer_alloc(hdev); - if (ret) { - hclge_notify_client(hdev, HNAE3_UP_CLIENT); + ret = hclge_tm_flush_cfg(hdev, true); + if (ret) return ret; - } - return hclge_notify_client(hdev, HNAE3_UP_CLIENT); + /* No matter whether the following operations are performed + * successfully or not, disabling the tm flush and notify + * the network status to up are necessary. + * Do not return immediately. + */ + ret = hclge_buffer_alloc(hdev); + if (ret) + last_bad_ret = ret; + + ret = hclge_tm_flush_cfg(hdev, false); + if (ret) + last_bad_ret = ret; + + ret = hclge_notify_client(hdev, HNAE3_UP_CLIENT); + if (ret) + last_bad_ret = ret; + + return last_bad_ret; } static int hclge_ieee_setapp(struct hnae3_handle *h, struct dcb_app *app) @@ -452,7 +484,7 @@ static u8 hclge_getdcbx(struct hnae3_handle *h) struct hclge_vport *vport = hclge_get_vport(h); struct hclge_dev *hdev = vport->back; - if (hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE) + if (h->kinfo.tc_info.mqprio_active) return 0; return hdev->dcbx_cap; @@ -576,7 +608,8 @@ static int hclge_setup_tc(struct hnae3_handle *h, if (!test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state)) return -EBUSY; - if (hdev->flag & HCLGE_FLAG_DCB_ENABLE) + kinfo = &vport->nic.kinfo; + if (kinfo->tc_info.dcb_ets_active) return -EINVAL; ret = hclge_mqprio_qopt_check(hdev, mqprio_qopt); @@ -590,7 +623,6 @@ static int hclge_setup_tc(struct hnae3_handle *h, if (ret) return ret; - kinfo = &vport->nic.kinfo; memcpy(&old_tc_info, &kinfo->tc_info, sizeof(old_tc_info)); hclge_sync_mqprio_qopt(&kinfo->tc_info, mqprio_qopt); kinfo->tc_info.mqprio_active = tc > 0; @@ -599,13 +631,6 @@ static int hclge_setup_tc(struct hnae3_handle *h, if (ret) goto err_out; - hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE; - - if (tc > 1) - hdev->flag |= HCLGE_FLAG_MQPRIO_ENABLE; - else - hdev->flag &= ~HCLGE_FLAG_MQPRIO_ENABLE; - return hclge_notify_init_up(hdev); err_out: diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index a0b46e7d863e..ff3f8f424ad9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -7,6 +7,7 @@ #include "hclge_debugfs.h" #include "hclge_err.h" #include "hclge_main.h" +#include "hclge_regs.h" #include "hclge_tm.h" #include "hnae3.h" @@ -88,16 +89,35 @@ static void hclge_dbg_fill_content(char *content, u16 len, const struct hclge_dbg_item *items, const char **result, u16 size) { +#define HCLGE_DBG_LINE_END_LEN 2 char *pos = content; + u16 item_len; u16 i; + if (!len) { + return; + } else if (len <= HCLGE_DBG_LINE_END_LEN) { + *pos++ = '\0'; + return; + } + memset(content, ' ', len); + len -= HCLGE_DBG_LINE_END_LEN; + for (i = 0; i < size; i++) { - if (result) - strncpy(pos, result[i], strlen(result[i])); - else - strncpy(pos, items[i].name, strlen(items[i].name)); - pos += strlen(items[i].name) + items[i].interval; + item_len = strlen(items[i].name) + items[i].interval; + if (len < item_len) + break; + + if (result) { + if (item_len < strlen(result[i])) + break; + memcpy(pos, result[i], strlen(result[i])); + } else { + memcpy(pos, items[i].name, strlen(items[i].name)); + } + pos += item_len; + len -= item_len; } *pos++ = '\n'; *pos++ = '\0'; @@ -674,8 +694,7 @@ static int hclge_dbg_dump_tc(struct hclge_dev *hdev, char *buf, int len) for (i = 0; i < HNAE3_MAX_TC; i++) { sch_mode_str = ets_weight->tc_weight[i] ? "dwrr" : "sp"; pos += scnprintf(buf + pos, len - pos, "%u %4s %3u\n", - i, sch_mode_str, - hdev->tm_info.pg_info[0].tc_dwrr[i]); + i, sch_mode_str, ets_weight->tc_weight[i]); } return 0; @@ -1500,7 +1519,7 @@ static int hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, bool sel_x, struct hclge_desc desc[3]; int pos = 0; int ret, i; - u32 *req; + __le32 *req; hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_FD_TCAM_OP, true); desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); @@ -1525,22 +1544,22 @@ static int hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, bool sel_x, tcam_msg.loc); /* tcam_data0 ~ tcam_data1 */ - req = (u32 *)req1->tcam_data; + req = (__le32 *)req1->tcam_data; for (i = 0; i < 2; i++) pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos, - "%08x\n", *req++); + "%08x\n", le32_to_cpu(*req++)); /* tcam_data2 ~ tcam_data7 */ - req = (u32 *)req2->tcam_data; + req = (__le32 *)req2->tcam_data; for (i = 0; i < 6; i++) pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos, - "%08x\n", *req++); + "%08x\n", le32_to_cpu(*req++)); /* tcam_data8 ~ tcam_data12 */ - req = (u32 *)req3->tcam_data; + req = (__le32 *)req3->tcam_data; for (i = 0; i < 5; i++) pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos, - "%08x\n", *req++); + "%08x\n", le32_to_cpu(*req++)); return ret; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 2689b108f7df..ce6b658a930c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -20,6 +20,7 @@ #include "hclge_main.h" #include "hclge_mbx.h" #include "hclge_mdio.h" +#include "hclge_regs.h" #include "hclge_tm.h" #include "hclge_err.h" #include "hnae3.h" @@ -40,20 +41,6 @@ #define HCLGE_PF_RESET_SYNC_TIME 20 #define HCLGE_PF_RESET_SYNC_CNT 1500 -/* Get DFX BD number offset */ -#define HCLGE_DFX_BIOS_BD_OFFSET 1 -#define HCLGE_DFX_SSU_0_BD_OFFSET 2 -#define HCLGE_DFX_SSU_1_BD_OFFSET 3 -#define HCLGE_DFX_IGU_BD_OFFSET 4 -#define HCLGE_DFX_RPU_0_BD_OFFSET 5 -#define HCLGE_DFX_RPU_1_BD_OFFSET 6 -#define HCLGE_DFX_NCSI_BD_OFFSET 7 -#define HCLGE_DFX_RTC_BD_OFFSET 8 -#define HCLGE_DFX_PPP_BD_OFFSET 9 -#define HCLGE_DFX_RCB_BD_OFFSET 10 -#define HCLGE_DFX_TQP_BD_OFFSET 11 -#define HCLGE_DFX_SSU_2_BD_OFFSET 12 - #define HCLGE_LINK_STATUS_MS 10 static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mps); @@ -72,6 +59,8 @@ static void hclge_restore_hw_table(struct hclge_dev *hdev); static void hclge_sync_promisc_mode(struct hclge_dev *hdev); static void hclge_sync_fd_table(struct hclge_dev *hdev); static void hclge_update_fec_stats(struct hclge_dev *hdev); +static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret, + int wait_cnt); static struct hnae3_ae_algo ae_algo; @@ -92,62 +81,6 @@ static const struct pci_device_id ae_algo_pci_tbl[] = { MODULE_DEVICE_TABLE(pci, ae_algo_pci_tbl); -static const u32 cmdq_reg_addr_list[] = {HCLGE_COMM_NIC_CSQ_BASEADDR_L_REG, - HCLGE_COMM_NIC_CSQ_BASEADDR_H_REG, - HCLGE_COMM_NIC_CSQ_DEPTH_REG, - HCLGE_COMM_NIC_CSQ_TAIL_REG, - HCLGE_COMM_NIC_CSQ_HEAD_REG, - HCLGE_COMM_NIC_CRQ_BASEADDR_L_REG, - HCLGE_COMM_NIC_CRQ_BASEADDR_H_REG, - HCLGE_COMM_NIC_CRQ_DEPTH_REG, - HCLGE_COMM_NIC_CRQ_TAIL_REG, - HCLGE_COMM_NIC_CRQ_HEAD_REG, - HCLGE_COMM_VECTOR0_CMDQ_SRC_REG, - HCLGE_COMM_CMDQ_INTR_STS_REG, - HCLGE_COMM_CMDQ_INTR_EN_REG, - HCLGE_COMM_CMDQ_INTR_GEN_REG}; - -static const u32 common_reg_addr_list[] = {HCLGE_MISC_VECTOR_REG_BASE, - HCLGE_PF_OTHER_INT_REG, - HCLGE_MISC_RESET_STS_REG, - HCLGE_MISC_VECTOR_INT_STS, - HCLGE_GLOBAL_RESET_REG, - HCLGE_FUN_RST_ING, - HCLGE_GRO_EN_REG}; - -static const u32 ring_reg_addr_list[] = {HCLGE_RING_RX_ADDR_L_REG, - HCLGE_RING_RX_ADDR_H_REG, - HCLGE_RING_RX_BD_NUM_REG, - HCLGE_RING_RX_BD_LENGTH_REG, - HCLGE_RING_RX_MERGE_EN_REG, - HCLGE_RING_RX_TAIL_REG, - HCLGE_RING_RX_HEAD_REG, - HCLGE_RING_RX_FBD_NUM_REG, - HCLGE_RING_RX_OFFSET_REG, - HCLGE_RING_RX_FBD_OFFSET_REG, - HCLGE_RING_RX_STASH_REG, - HCLGE_RING_RX_BD_ERR_REG, - HCLGE_RING_TX_ADDR_L_REG, - HCLGE_RING_TX_ADDR_H_REG, - HCLGE_RING_TX_BD_NUM_REG, - HCLGE_RING_TX_PRIORITY_REG, - HCLGE_RING_TX_TC_REG, - HCLGE_RING_TX_MERGE_EN_REG, - HCLGE_RING_TX_TAIL_REG, - HCLGE_RING_TX_HEAD_REG, - HCLGE_RING_TX_FBD_NUM_REG, - HCLGE_RING_TX_OFFSET_REG, - HCLGE_RING_TX_EBD_NUM_REG, - HCLGE_RING_TX_EBD_OFFSET_REG, - HCLGE_RING_TX_BD_ERR_REG, - HCLGE_RING_EN_REG}; - -static const u32 tqp_intr_reg_addr_list[] = {HCLGE_TQP_INTR_CTRL_REG, - HCLGE_TQP_INTR_GL0_REG, - HCLGE_TQP_INTR_GL1_REG, - HCLGE_TQP_INTR_GL2_REG, - HCLGE_TQP_INTR_RL_REG}; - static const char hns3_nic_test_strs[][ETH_GSTRING_LEN] = { "External Loopback test", "App Loopback test", @@ -373,36 +306,6 @@ static const struct hclge_mac_mgr_tbl_entry_cmd hclge_mgr_table[] = { }, }; -static const u32 hclge_dfx_bd_offset_list[] = { - HCLGE_DFX_BIOS_BD_OFFSET, - HCLGE_DFX_SSU_0_BD_OFFSET, - HCLGE_DFX_SSU_1_BD_OFFSET, - HCLGE_DFX_IGU_BD_OFFSET, - HCLGE_DFX_RPU_0_BD_OFFSET, - HCLGE_DFX_RPU_1_BD_OFFSET, - HCLGE_DFX_NCSI_BD_OFFSET, - HCLGE_DFX_RTC_BD_OFFSET, - HCLGE_DFX_PPP_BD_OFFSET, - HCLGE_DFX_RCB_BD_OFFSET, - HCLGE_DFX_TQP_BD_OFFSET, - HCLGE_DFX_SSU_2_BD_OFFSET -}; - -static const enum hclge_opcode_type hclge_dfx_reg_opcode_list[] = { - HCLGE_OPC_DFX_BIOS_COMMON_REG, - HCLGE_OPC_DFX_SSU_REG_0, - HCLGE_OPC_DFX_SSU_REG_1, - HCLGE_OPC_DFX_IGU_EGU_REG, - HCLGE_OPC_DFX_RPU_REG_0, - HCLGE_OPC_DFX_RPU_REG_1, - HCLGE_OPC_DFX_NCSI_REG, - HCLGE_OPC_DFX_RTC_REG, - HCLGE_OPC_DFX_PPP_REG, - HCLGE_OPC_DFX_RCB_REG, - HCLGE_OPC_DFX_TQP_REG, - HCLGE_OPC_DFX_SSU_REG_2 -}; - static const struct key_info meta_data_key_info[] = { { PACKET_TYPE_ID, 6 }, { IP_FRAGEMENT, 1 }, @@ -689,8 +592,7 @@ static void hclge_update_stats_for_all(struct hclge_dev *hdev) "Update MAC stats fail, status = %d.\n", status); } -static void hclge_update_stats(struct hnae3_handle *handle, - struct net_device_stats *net_stats) +static void hclge_update_stats(struct hnae3_handle *handle) { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; @@ -824,7 +726,7 @@ static void hclge_get_mac_stat(struct hnae3_handle *handle, struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - hclge_update_stats(handle, NULL); + hclge_update_stats(handle); mac_stats->tx_pause_cnt = hdev->mac_stats.mac_tx_mac_pause_num; mac_stats->rx_pause_cnt = hdev->mac_stats.mac_rx_mac_pause_num; @@ -1424,6 +1326,7 @@ static void hclge_set_default_dev_specs(struct hclge_dev *hdev) ae_dev->dev_specs.max_frm_size = HCLGE_MAC_MAX_FRAME; ae_dev->dev_specs.max_qset_num = HCLGE_MAX_QSET_NUM; ae_dev->dev_specs.umv_size = HCLGE_DEFAULT_UMV_SPACE_PER_PF; + ae_dev->dev_specs.tnl_num = 0; } static void hclge_parse_dev_specs(struct hclge_dev *hdev, @@ -1447,6 +1350,7 @@ static void hclge_parse_dev_specs(struct hclge_dev *hdev, ae_dev->dev_specs.max_frm_size = le16_to_cpu(req1->max_frm_size); ae_dev->dev_specs.umv_size = le16_to_cpu(req1->umv_size); ae_dev->dev_specs.mc_mac_size = le16_to_cpu(req1->mc_mac_size); + ae_dev->dev_specs.tnl_num = req1->tnl_num; } static void hclge_check_dev_specs(struct hclge_dev *hdev) @@ -4965,9 +4869,7 @@ int hclge_rss_init_hw(struct hclge_dev *hdev) if (ret) return ret; - ret = hclge_comm_set_rss_input_tuple(&hdev->vport[0].nic, - &hdev->hw.hw, true, - &hdev->rss_cfg); + ret = hclge_comm_set_rss_input_tuple(&hdev->hw.hw, &hdev->rss_cfg); if (ret) return ret; @@ -6243,8 +6145,7 @@ static int hclge_fd_check_spec(struct hclge_dev *hdev, return hclge_fd_check_ext_tuple(hdev, fs, unused_tuple); } -static void hclge_fd_get_tcpip4_tuple(struct hclge_dev *hdev, - struct ethtool_rx_flow_spec *fs, +static void hclge_fd_get_tcpip4_tuple(struct ethtool_rx_flow_spec *fs, struct hclge_fd_rule *rule, u8 ip_proto) { rule->tuples.src_ip[IPV4_INDEX] = @@ -6273,8 +6174,7 @@ static void hclge_fd_get_tcpip4_tuple(struct hclge_dev *hdev, rule->tuples_mask.ip_proto = 0xFF; } -static void hclge_fd_get_ip4_tuple(struct hclge_dev *hdev, - struct ethtool_rx_flow_spec *fs, +static void hclge_fd_get_ip4_tuple(struct ethtool_rx_flow_spec *fs, struct hclge_fd_rule *rule) { rule->tuples.src_ip[IPV4_INDEX] = @@ -6297,8 +6197,7 @@ static void hclge_fd_get_ip4_tuple(struct hclge_dev *hdev, rule->tuples_mask.ether_proto = 0xFFFF; } -static void hclge_fd_get_tcpip6_tuple(struct hclge_dev *hdev, - struct ethtool_rx_flow_spec *fs, +static void hclge_fd_get_tcpip6_tuple(struct ethtool_rx_flow_spec *fs, struct hclge_fd_rule *rule, u8 ip_proto) { be32_to_cpu_array(rule->tuples.src_ip, fs->h_u.tcp_ip6_spec.ip6src, @@ -6327,8 +6226,7 @@ static void hclge_fd_get_tcpip6_tuple(struct hclge_dev *hdev, rule->tuples_mask.ip_proto = 0xFF; } -static void hclge_fd_get_ip6_tuple(struct hclge_dev *hdev, - struct ethtool_rx_flow_spec *fs, +static void hclge_fd_get_ip6_tuple(struct ethtool_rx_flow_spec *fs, struct hclge_fd_rule *rule) { be32_to_cpu_array(rule->tuples.src_ip, fs->h_u.usr_ip6_spec.ip6src, @@ -6351,8 +6249,7 @@ static void hclge_fd_get_ip6_tuple(struct hclge_dev *hdev, rule->tuples_mask.ether_proto = 0xFFFF; } -static void hclge_fd_get_ether_tuple(struct hclge_dev *hdev, - struct ethtool_rx_flow_spec *fs, +static void hclge_fd_get_ether_tuple(struct ethtool_rx_flow_spec *fs, struct hclge_fd_rule *rule) { ether_addr_copy(rule->tuples.src_mac, fs->h_u.ether_spec.h_source); @@ -6388,8 +6285,7 @@ static void hclge_fd_get_user_def_tuple(struct hclge_fd_user_def_info *info, rule->ep.user_def = *info; } -static int hclge_fd_get_tuple(struct hclge_dev *hdev, - struct ethtool_rx_flow_spec *fs, +static int hclge_fd_get_tuple(struct ethtool_rx_flow_spec *fs, struct hclge_fd_rule *rule, struct hclge_fd_user_def_info *info) { @@ -6397,31 +6293,31 @@ static int hclge_fd_get_tuple(struct hclge_dev *hdev, switch (flow_type) { case SCTP_V4_FLOW: - hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_SCTP); + hclge_fd_get_tcpip4_tuple(fs, rule, IPPROTO_SCTP); break; case TCP_V4_FLOW: - hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_TCP); + hclge_fd_get_tcpip4_tuple(fs, rule, IPPROTO_TCP); break; case UDP_V4_FLOW: - hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_UDP); + hclge_fd_get_tcpip4_tuple(fs, rule, IPPROTO_UDP); break; case IP_USER_FLOW: - hclge_fd_get_ip4_tuple(hdev, fs, rule); + hclge_fd_get_ip4_tuple(fs, rule); break; case SCTP_V6_FLOW: - hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_SCTP); + hclge_fd_get_tcpip6_tuple(fs, rule, IPPROTO_SCTP); break; case TCP_V6_FLOW: - hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_TCP); + hclge_fd_get_tcpip6_tuple(fs, rule, IPPROTO_TCP); break; case UDP_V6_FLOW: - hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_UDP); + hclge_fd_get_tcpip6_tuple(fs, rule, IPPROTO_UDP); break; case IPV6_USER_FLOW: - hclge_fd_get_ip6_tuple(hdev, fs, rule); + hclge_fd_get_ip6_tuple(fs, rule); break; case ETHER_FLOW: - hclge_fd_get_ether_tuple(hdev, fs, rule); + hclge_fd_get_ether_tuple(fs, rule); break; default: return -EOPNOTSUPP; @@ -6578,7 +6474,7 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle, if (!rule) return -ENOMEM; - ret = hclge_fd_get_tuple(hdev, fs, rule, &info); + ret = hclge_fd_get_tuple(fs, rule, &info); if (ret) { kfree(rule); return ret; @@ -7567,6 +7463,8 @@ static void hclge_enable_fd(struct hnae3_handle *handle, bool enable) static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable) { +#define HCLGE_LINK_STATUS_WAIT_CNT 3 + struct hclge_desc desc; struct hclge_config_mac_mode_cmd *req = (struct hclge_config_mac_mode_cmd *)desc.data; @@ -7591,9 +7489,15 @@ static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable) req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en); ret = hclge_cmd_send(&hdev->hw, &desc, 1); - if (ret) + if (ret) { dev_err(&hdev->pdev->dev, "mac enable fail, ret =%d.\n", ret); + return; + } + + if (!enable) + hclge_mac_link_status_wait(hdev, HCLGE_LINK_STATUS_DOWN, + HCLGE_LINK_STATUS_WAIT_CNT); } static int hclge_config_switch_param(struct hclge_dev *hdev, int vfid, @@ -7656,10 +7560,9 @@ static void hclge_phy_link_status_wait(struct hclge_dev *hdev, } while (++i < HCLGE_PHY_LINK_STATUS_NUM); } -static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret) +static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret, + int wait_cnt) { -#define HCLGE_MAC_LINK_STATUS_NUM 100 - int link_status; int i = 0; int ret; @@ -7672,13 +7575,15 @@ static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret) return 0; msleep(HCLGE_LINK_STATUS_MS); - } while (++i < HCLGE_MAC_LINK_STATUS_NUM); + } while (++i < wait_cnt); return -EBUSY; } static int hclge_mac_phy_link_status_wait(struct hclge_dev *hdev, bool en, bool is_phy) { +#define HCLGE_MAC_LINK_STATUS_NUM 100 + int link_ret; link_ret = en ? HCLGE_LINK_STATUS_UP : HCLGE_LINK_STATUS_DOWN; @@ -7686,7 +7591,8 @@ static int hclge_mac_phy_link_status_wait(struct hclge_dev *hdev, bool en, if (is_phy) hclge_phy_link_status_wait(hdev, link_ret); - return hclge_mac_link_status_wait(hdev, link_ret); + return hclge_mac_link_status_wait(hdev, link_ret, + HCLGE_MAC_LINK_STATUS_NUM); } static int hclge_set_app_loopback(struct hclge_dev *hdev, bool en) @@ -10924,9 +10830,12 @@ int hclge_cfg_flowctrl(struct hclge_dev *hdev) u32 rx_pause, tx_pause; u8 flowctl; - if (!phydev->link || !phydev->autoneg) + if (!phydev->link) return 0; + if (!phydev->autoneg) + return hclge_mac_pause_setup_hw(hdev); + local_advertising = linkmode_adv_to_lcl_adv_t(phydev->advertising); if (phydev->pause) @@ -11117,6 +11026,7 @@ static void hclge_get_mdix_mode(struct hnae3_handle *handle, static void hclge_info_show(struct hclge_dev *hdev) { + struct hnae3_handle *handle = &hdev->vport->nic; struct device *dev = &hdev->pdev->dev; dev_info(dev, "PF info begin:\n"); @@ -11133,9 +11043,9 @@ static void hclge_info_show(struct hclge_dev *hdev) dev_info(dev, "This is %s PF\n", hdev->flag & HCLGE_FLAG_MAIN ? "main" : "not main"); dev_info(dev, "DCB %s\n", - hdev->flag & HCLGE_FLAG_DCB_ENABLE ? "enable" : "disable"); + handle->kinfo.tc_info.dcb_ets_active ? "enable" : "disable"); dev_info(dev, "MQPRIO %s\n", - hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE ? "enable" : "disable"); + handle->kinfo.tc_info.mqprio_active ? "enable" : "disable"); dev_info(dev, "Default tx spare buffer size: %u\n", hdev->tx_spare_buf_size); @@ -12377,463 +12287,6 @@ out: return ret; } -static int hclge_get_regs_num(struct hclge_dev *hdev, u32 *regs_num_32_bit, - u32 *regs_num_64_bit) -{ - struct hclge_desc desc; - u32 total_num; - int ret; - - hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_REG_NUM, true); - ret = hclge_cmd_send(&hdev->hw, &desc, 1); - if (ret) { - dev_err(&hdev->pdev->dev, - "Query register number cmd failed, ret = %d.\n", ret); - return ret; - } - - *regs_num_32_bit = le32_to_cpu(desc.data[0]); - *regs_num_64_bit = le32_to_cpu(desc.data[1]); - - total_num = *regs_num_32_bit + *regs_num_64_bit; - if (!total_num) - return -EINVAL; - - return 0; -} - -static int hclge_get_32_bit_regs(struct hclge_dev *hdev, u32 regs_num, - void *data) -{ -#define HCLGE_32_BIT_REG_RTN_DATANUM 8 -#define HCLGE_32_BIT_DESC_NODATA_LEN 2 - - struct hclge_desc *desc; - u32 *reg_val = data; - __le32 *desc_data; - int nodata_num; - int cmd_num; - int i, k, n; - int ret; - - if (regs_num == 0) - return 0; - - nodata_num = HCLGE_32_BIT_DESC_NODATA_LEN; - cmd_num = DIV_ROUND_UP(regs_num + nodata_num, - HCLGE_32_BIT_REG_RTN_DATANUM); - desc = kcalloc(cmd_num, sizeof(struct hclge_desc), GFP_KERNEL); - if (!desc) - return -ENOMEM; - - hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_32_BIT_REG, true); - ret = hclge_cmd_send(&hdev->hw, desc, cmd_num); - if (ret) { - dev_err(&hdev->pdev->dev, - "Query 32 bit register cmd failed, ret = %d.\n", ret); - kfree(desc); - return ret; - } - - for (i = 0; i < cmd_num; i++) { - if (i == 0) { - desc_data = (__le32 *)(&desc[i].data[0]); - n = HCLGE_32_BIT_REG_RTN_DATANUM - nodata_num; - } else { - desc_data = (__le32 *)(&desc[i]); - n = HCLGE_32_BIT_REG_RTN_DATANUM; - } - for (k = 0; k < n; k++) { - *reg_val++ = le32_to_cpu(*desc_data++); - - regs_num--; - if (!regs_num) - break; - } - } - - kfree(desc); - return 0; -} - -static int hclge_get_64_bit_regs(struct hclge_dev *hdev, u32 regs_num, - void *data) -{ -#define HCLGE_64_BIT_REG_RTN_DATANUM 4 -#define HCLGE_64_BIT_DESC_NODATA_LEN 1 - - struct hclge_desc *desc; - u64 *reg_val = data; - __le64 *desc_data; - int nodata_len; - int cmd_num; - int i, k, n; - int ret; - - if (regs_num == 0) - return 0; - - nodata_len = HCLGE_64_BIT_DESC_NODATA_LEN; - cmd_num = DIV_ROUND_UP(regs_num + nodata_len, - HCLGE_64_BIT_REG_RTN_DATANUM); - desc = kcalloc(cmd_num, sizeof(struct hclge_desc), GFP_KERNEL); - if (!desc) - return -ENOMEM; - - hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_64_BIT_REG, true); - ret = hclge_cmd_send(&hdev->hw, desc, cmd_num); - if (ret) { - dev_err(&hdev->pdev->dev, - "Query 64 bit register cmd failed, ret = %d.\n", ret); - kfree(desc); - return ret; - } - - for (i = 0; i < cmd_num; i++) { - if (i == 0) { - desc_data = (__le64 *)(&desc[i].data[0]); - n = HCLGE_64_BIT_REG_RTN_DATANUM - nodata_len; - } else { - desc_data = (__le64 *)(&desc[i]); - n = HCLGE_64_BIT_REG_RTN_DATANUM; - } - for (k = 0; k < n; k++) { - *reg_val++ = le64_to_cpu(*desc_data++); - - regs_num--; - if (!regs_num) - break; - } - } - - kfree(desc); - return 0; -} - -#define MAX_SEPARATE_NUM 4 -#define SEPARATOR_VALUE 0xFDFCFBFA -#define REG_NUM_PER_LINE 4 -#define REG_LEN_PER_LINE (REG_NUM_PER_LINE * sizeof(u32)) -#define REG_SEPARATOR_LINE 1 -#define REG_NUM_REMAIN_MASK 3 - -int hclge_query_bd_num_cmd_send(struct hclge_dev *hdev, struct hclge_desc *desc) -{ - int i; - - /* initialize command BD except the last one */ - for (i = 0; i < HCLGE_GET_DFX_REG_TYPE_CNT - 1; i++) { - hclge_cmd_setup_basic_desc(&desc[i], HCLGE_OPC_DFX_BD_NUM, - true); - desc[i].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); - } - - /* initialize the last command BD */ - hclge_cmd_setup_basic_desc(&desc[i], HCLGE_OPC_DFX_BD_NUM, true); - - return hclge_cmd_send(&hdev->hw, desc, HCLGE_GET_DFX_REG_TYPE_CNT); -} - -static int hclge_get_dfx_reg_bd_num(struct hclge_dev *hdev, - int *bd_num_list, - u32 type_num) -{ - u32 entries_per_desc, desc_index, index, offset, i; - struct hclge_desc desc[HCLGE_GET_DFX_REG_TYPE_CNT]; - int ret; - - ret = hclge_query_bd_num_cmd_send(hdev, desc); - if (ret) { - dev_err(&hdev->pdev->dev, - "Get dfx bd num fail, status is %d.\n", ret); - return ret; - } - - entries_per_desc = ARRAY_SIZE(desc[0].data); - for (i = 0; i < type_num; i++) { - offset = hclge_dfx_bd_offset_list[i]; - index = offset % entries_per_desc; - desc_index = offset / entries_per_desc; - bd_num_list[i] = le32_to_cpu(desc[desc_index].data[index]); - } - - return ret; -} - -static int hclge_dfx_reg_cmd_send(struct hclge_dev *hdev, - struct hclge_desc *desc_src, int bd_num, - enum hclge_opcode_type cmd) -{ - struct hclge_desc *desc = desc_src; - int i, ret; - - hclge_cmd_setup_basic_desc(desc, cmd, true); - for (i = 0; i < bd_num - 1; i++) { - desc->flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); - desc++; - hclge_cmd_setup_basic_desc(desc, cmd, true); - } - - desc = desc_src; - ret = hclge_cmd_send(&hdev->hw, desc, bd_num); - if (ret) - dev_err(&hdev->pdev->dev, - "Query dfx reg cmd(0x%x) send fail, status is %d.\n", - cmd, ret); - - return ret; -} - -static int hclge_dfx_reg_fetch_data(struct hclge_desc *desc_src, int bd_num, - void *data) -{ - int entries_per_desc, reg_num, separator_num, desc_index, index, i; - struct hclge_desc *desc = desc_src; - u32 *reg = data; - - entries_per_desc = ARRAY_SIZE(desc->data); - reg_num = entries_per_desc * bd_num; - separator_num = REG_NUM_PER_LINE - (reg_num & REG_NUM_REMAIN_MASK); - for (i = 0; i < reg_num; i++) { - index = i % entries_per_desc; - desc_index = i / entries_per_desc; - *reg++ = le32_to_cpu(desc[desc_index].data[index]); - } - for (i = 0; i < separator_num; i++) - *reg++ = SEPARATOR_VALUE; - - return reg_num + separator_num; -} - -static int hclge_get_dfx_reg_len(struct hclge_dev *hdev, int *len) -{ - u32 dfx_reg_type_num = ARRAY_SIZE(hclge_dfx_bd_offset_list); - int data_len_per_desc, bd_num, i; - int *bd_num_list; - u32 data_len; - int ret; - - bd_num_list = kcalloc(dfx_reg_type_num, sizeof(int), GFP_KERNEL); - if (!bd_num_list) - return -ENOMEM; - - ret = hclge_get_dfx_reg_bd_num(hdev, bd_num_list, dfx_reg_type_num); - if (ret) { - dev_err(&hdev->pdev->dev, - "Get dfx reg bd num fail, status is %d.\n", ret); - goto out; - } - - data_len_per_desc = sizeof_field(struct hclge_desc, data); - *len = 0; - for (i = 0; i < dfx_reg_type_num; i++) { - bd_num = bd_num_list[i]; - data_len = data_len_per_desc * bd_num; - *len += (data_len / REG_LEN_PER_LINE + 1) * REG_LEN_PER_LINE; - } - -out: - kfree(bd_num_list); - return ret; -} - -static int hclge_get_dfx_reg(struct hclge_dev *hdev, void *data) -{ - u32 dfx_reg_type_num = ARRAY_SIZE(hclge_dfx_bd_offset_list); - int bd_num, bd_num_max, buf_len, i; - struct hclge_desc *desc_src; - int *bd_num_list; - u32 *reg = data; - int ret; - - bd_num_list = kcalloc(dfx_reg_type_num, sizeof(int), GFP_KERNEL); - if (!bd_num_list) - return -ENOMEM; - - ret = hclge_get_dfx_reg_bd_num(hdev, bd_num_list, dfx_reg_type_num); - if (ret) { - dev_err(&hdev->pdev->dev, - "Get dfx reg bd num fail, status is %d.\n", ret); - goto out; - } - - bd_num_max = bd_num_list[0]; - for (i = 1; i < dfx_reg_type_num; i++) - bd_num_max = max_t(int, bd_num_max, bd_num_list[i]); - - buf_len = sizeof(*desc_src) * bd_num_max; - desc_src = kzalloc(buf_len, GFP_KERNEL); - if (!desc_src) { - ret = -ENOMEM; - goto out; - } - - for (i = 0; i < dfx_reg_type_num; i++) { - bd_num = bd_num_list[i]; - ret = hclge_dfx_reg_cmd_send(hdev, desc_src, bd_num, - hclge_dfx_reg_opcode_list[i]); - if (ret) { - dev_err(&hdev->pdev->dev, - "Get dfx reg fail, status is %d.\n", ret); - break; - } - - reg += hclge_dfx_reg_fetch_data(desc_src, bd_num, reg); - } - - kfree(desc_src); -out: - kfree(bd_num_list); - return ret; -} - -static int hclge_fetch_pf_reg(struct hclge_dev *hdev, void *data, - struct hnae3_knic_private_info *kinfo) -{ -#define HCLGE_RING_REG_OFFSET 0x200 -#define HCLGE_RING_INT_REG_OFFSET 0x4 - - int i, j, reg_num, separator_num; - int data_num_sum; - u32 *reg = data; - - /* fetching per-PF registers valus from PF PCIe register space */ - reg_num = ARRAY_SIZE(cmdq_reg_addr_list); - separator_num = MAX_SEPARATE_NUM - (reg_num & REG_NUM_REMAIN_MASK); - for (i = 0; i < reg_num; i++) - *reg++ = hclge_read_dev(&hdev->hw, cmdq_reg_addr_list[i]); - for (i = 0; i < separator_num; i++) - *reg++ = SEPARATOR_VALUE; - data_num_sum = reg_num + separator_num; - - reg_num = ARRAY_SIZE(common_reg_addr_list); - separator_num = MAX_SEPARATE_NUM - (reg_num & REG_NUM_REMAIN_MASK); - for (i = 0; i < reg_num; i++) - *reg++ = hclge_read_dev(&hdev->hw, common_reg_addr_list[i]); - for (i = 0; i < separator_num; i++) - *reg++ = SEPARATOR_VALUE; - data_num_sum += reg_num + separator_num; - - reg_num = ARRAY_SIZE(ring_reg_addr_list); - separator_num = MAX_SEPARATE_NUM - (reg_num & REG_NUM_REMAIN_MASK); - for (j = 0; j < kinfo->num_tqps; j++) { - for (i = 0; i < reg_num; i++) - *reg++ = hclge_read_dev(&hdev->hw, - ring_reg_addr_list[i] + - HCLGE_RING_REG_OFFSET * j); - for (i = 0; i < separator_num; i++) - *reg++ = SEPARATOR_VALUE; - } - data_num_sum += (reg_num + separator_num) * kinfo->num_tqps; - - reg_num = ARRAY_SIZE(tqp_intr_reg_addr_list); - separator_num = MAX_SEPARATE_NUM - (reg_num & REG_NUM_REMAIN_MASK); - for (j = 0; j < hdev->num_msi_used - 1; j++) { - for (i = 0; i < reg_num; i++) - *reg++ = hclge_read_dev(&hdev->hw, - tqp_intr_reg_addr_list[i] + - HCLGE_RING_INT_REG_OFFSET * j); - for (i = 0; i < separator_num; i++) - *reg++ = SEPARATOR_VALUE; - } - data_num_sum += (reg_num + separator_num) * (hdev->num_msi_used - 1); - - return data_num_sum; -} - -static int hclge_get_regs_len(struct hnae3_handle *handle) -{ - int cmdq_lines, common_lines, ring_lines, tqp_intr_lines; - struct hnae3_knic_private_info *kinfo = &handle->kinfo; - struct hclge_vport *vport = hclge_get_vport(handle); - struct hclge_dev *hdev = vport->back; - int regs_num_32_bit, regs_num_64_bit, dfx_regs_len; - int regs_lines_32_bit, regs_lines_64_bit; - int ret; - - ret = hclge_get_regs_num(hdev, ®s_num_32_bit, ®s_num_64_bit); - if (ret) { - dev_err(&hdev->pdev->dev, - "Get register number failed, ret = %d.\n", ret); - return ret; - } - - ret = hclge_get_dfx_reg_len(hdev, &dfx_regs_len); - if (ret) { - dev_err(&hdev->pdev->dev, - "Get dfx reg len failed, ret = %d.\n", ret); - return ret; - } - - cmdq_lines = sizeof(cmdq_reg_addr_list) / REG_LEN_PER_LINE + - REG_SEPARATOR_LINE; - common_lines = sizeof(common_reg_addr_list) / REG_LEN_PER_LINE + - REG_SEPARATOR_LINE; - ring_lines = sizeof(ring_reg_addr_list) / REG_LEN_PER_LINE + - REG_SEPARATOR_LINE; - tqp_intr_lines = sizeof(tqp_intr_reg_addr_list) / REG_LEN_PER_LINE + - REG_SEPARATOR_LINE; - regs_lines_32_bit = regs_num_32_bit * sizeof(u32) / REG_LEN_PER_LINE + - REG_SEPARATOR_LINE; - regs_lines_64_bit = regs_num_64_bit * sizeof(u64) / REG_LEN_PER_LINE + - REG_SEPARATOR_LINE; - - return (cmdq_lines + common_lines + ring_lines * kinfo->num_tqps + - tqp_intr_lines * (hdev->num_msi_used - 1) + regs_lines_32_bit + - regs_lines_64_bit) * REG_LEN_PER_LINE + dfx_regs_len; -} - -static void hclge_get_regs(struct hnae3_handle *handle, u32 *version, - void *data) -{ - struct hnae3_knic_private_info *kinfo = &handle->kinfo; - struct hclge_vport *vport = hclge_get_vport(handle); - struct hclge_dev *hdev = vport->back; - u32 regs_num_32_bit, regs_num_64_bit; - int i, reg_num, separator_num, ret; - u32 *reg = data; - - *version = hdev->fw_version; - - ret = hclge_get_regs_num(hdev, ®s_num_32_bit, ®s_num_64_bit); - if (ret) { - dev_err(&hdev->pdev->dev, - "Get register number failed, ret = %d.\n", ret); - return; - } - - reg += hclge_fetch_pf_reg(hdev, reg, kinfo); - - ret = hclge_get_32_bit_regs(hdev, regs_num_32_bit, reg); - if (ret) { - dev_err(&hdev->pdev->dev, - "Get 32 bit register failed, ret = %d.\n", ret); - return; - } - reg_num = regs_num_32_bit; - reg += reg_num; - separator_num = MAX_SEPARATE_NUM - (reg_num & REG_NUM_REMAIN_MASK); - for (i = 0; i < separator_num; i++) - *reg++ = SEPARATOR_VALUE; - - ret = hclge_get_64_bit_regs(hdev, regs_num_64_bit, reg); - if (ret) { - dev_err(&hdev->pdev->dev, - "Get 64 bit register failed, ret = %d.\n", ret); - return; - } - reg_num = regs_num_64_bit * 2; - reg += reg_num; - separator_num = MAX_SEPARATE_NUM - (reg_num & REG_NUM_REMAIN_MASK); - for (i = 0; i < separator_num; i++) - *reg++ = SEPARATOR_VALUE; - - ret = hclge_get_dfx_reg(hdev, reg); - if (ret) - dev_err(&hdev->pdev->dev, - "Get dfx register failed, ret = %d.\n", ret); -} - static int hclge_set_led_status(struct hclge_dev *hdev, u8 locate_led_status) { struct hclge_set_led_state_cmd *req; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 81aa6b0facf5..70319ce49a1d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -835,15 +835,10 @@ struct hclge_vf_vlan_cfg { * Then for input key(k) and mask(v), we can calculate the value by * the formulae: * x = (~k) & v - * y = (k ^ ~v) & k + * y = k & v */ -#define calc_x(x, k, v) (x = ~(k) & (v)) -#define calc_y(y, k, v) \ - do { \ - const typeof(k) _k_ = (k); \ - const typeof(v) _v_ = (v); \ - (y) = (_k_ ^ ~_v_) & (_k_); \ - } while (0) +#define calc_x(x, k, v) ((x) = ~(k) & (v)) +#define calc_y(y, k, v) ((y) = (k) & (v)) #define HCLGE_MAC_STATS_FIELD_OFF(f) (offsetof(struct hclge_mac_stats, f)) #define HCLGE_STATS_READ(p, offset) (*(u64 *)((u8 *)(p) + (offset))) @@ -924,8 +919,6 @@ struct hclge_dev { #define HCLGE_FLAG_MAIN BIT(0) #define HCLGE_FLAG_DCB_CAPABLE BIT(1) -#define HCLGE_FLAG_DCB_ENABLE BIT(2) -#define HCLGE_FLAG_MQPRIO_ENABLE BIT(3) u32 flag; u32 pkt_buf_size; /* Total pf buf size for tx/rx */ @@ -1147,8 +1140,6 @@ int hclge_push_vf_port_base_vlan_info(struct hclge_vport *vport, u8 vfid, u16 state, struct hclge_vlan_info *vlan_info); void hclge_task_schedule(struct hclge_dev *hdev, unsigned long delay_time); -int hclge_query_bd_num_cmd_send(struct hclge_dev *hdev, - struct hclge_desc *desc); void hclge_report_hw_error(struct hclge_dev *hdev, enum hnae3_hw_error_type type); void hclge_inform_vf_promisc_info(struct hclge_vport *vport); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_regs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_regs.c new file mode 100644 index 000000000000..43c1c18fa81f --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_regs.c @@ -0,0 +1,668 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright (c) 2023 Hisilicon Limited. + +#include "hclge_cmd.h" +#include "hclge_main.h" +#include "hclge_regs.h" +#include "hnae3.h" + +static const u32 cmdq_reg_addr_list[] = {HCLGE_COMM_NIC_CSQ_BASEADDR_L_REG, + HCLGE_COMM_NIC_CSQ_BASEADDR_H_REG, + HCLGE_COMM_NIC_CSQ_DEPTH_REG, + HCLGE_COMM_NIC_CSQ_TAIL_REG, + HCLGE_COMM_NIC_CSQ_HEAD_REG, + HCLGE_COMM_NIC_CRQ_BASEADDR_L_REG, + HCLGE_COMM_NIC_CRQ_BASEADDR_H_REG, + HCLGE_COMM_NIC_CRQ_DEPTH_REG, + HCLGE_COMM_NIC_CRQ_TAIL_REG, + HCLGE_COMM_NIC_CRQ_HEAD_REG, + HCLGE_COMM_VECTOR0_CMDQ_SRC_REG, + HCLGE_COMM_CMDQ_INTR_STS_REG, + HCLGE_COMM_CMDQ_INTR_EN_REG, + HCLGE_COMM_CMDQ_INTR_GEN_REG}; + +static const u32 common_reg_addr_list[] = {HCLGE_MISC_VECTOR_REG_BASE, + HCLGE_PF_OTHER_INT_REG, + HCLGE_MISC_RESET_STS_REG, + HCLGE_MISC_VECTOR_INT_STS, + HCLGE_GLOBAL_RESET_REG, + HCLGE_FUN_RST_ING, + HCLGE_GRO_EN_REG}; + +static const u32 ring_reg_addr_list[] = {HCLGE_RING_RX_ADDR_L_REG, + HCLGE_RING_RX_ADDR_H_REG, + HCLGE_RING_RX_BD_NUM_REG, + HCLGE_RING_RX_BD_LENGTH_REG, + HCLGE_RING_RX_MERGE_EN_REG, + HCLGE_RING_RX_TAIL_REG, + HCLGE_RING_RX_HEAD_REG, + HCLGE_RING_RX_FBD_NUM_REG, + HCLGE_RING_RX_OFFSET_REG, + HCLGE_RING_RX_FBD_OFFSET_REG, + HCLGE_RING_RX_STASH_REG, + HCLGE_RING_RX_BD_ERR_REG, + HCLGE_RING_TX_ADDR_L_REG, + HCLGE_RING_TX_ADDR_H_REG, + HCLGE_RING_TX_BD_NUM_REG, + HCLGE_RING_TX_PRIORITY_REG, + HCLGE_RING_TX_TC_REG, + HCLGE_RING_TX_MERGE_EN_REG, + HCLGE_RING_TX_TAIL_REG, + HCLGE_RING_TX_HEAD_REG, + HCLGE_RING_TX_FBD_NUM_REG, + HCLGE_RING_TX_OFFSET_REG, + HCLGE_RING_TX_EBD_NUM_REG, + HCLGE_RING_TX_EBD_OFFSET_REG, + HCLGE_RING_TX_BD_ERR_REG, + HCLGE_RING_EN_REG}; + +static const u32 tqp_intr_reg_addr_list[] = {HCLGE_TQP_INTR_CTRL_REG, + HCLGE_TQP_INTR_GL0_REG, + HCLGE_TQP_INTR_GL1_REG, + HCLGE_TQP_INTR_GL2_REG, + HCLGE_TQP_INTR_RL_REG}; + +/* Get DFX BD number offset */ +#define HCLGE_DFX_BIOS_BD_OFFSET 1 +#define HCLGE_DFX_SSU_0_BD_OFFSET 2 +#define HCLGE_DFX_SSU_1_BD_OFFSET 3 +#define HCLGE_DFX_IGU_BD_OFFSET 4 +#define HCLGE_DFX_RPU_0_BD_OFFSET 5 +#define HCLGE_DFX_RPU_1_BD_OFFSET 6 +#define HCLGE_DFX_NCSI_BD_OFFSET 7 +#define HCLGE_DFX_RTC_BD_OFFSET 8 +#define HCLGE_DFX_PPP_BD_OFFSET 9 +#define HCLGE_DFX_RCB_BD_OFFSET 10 +#define HCLGE_DFX_TQP_BD_OFFSET 11 +#define HCLGE_DFX_SSU_2_BD_OFFSET 12 + +static const u32 hclge_dfx_bd_offset_list[] = { + HCLGE_DFX_BIOS_BD_OFFSET, + HCLGE_DFX_SSU_0_BD_OFFSET, + HCLGE_DFX_SSU_1_BD_OFFSET, + HCLGE_DFX_IGU_BD_OFFSET, + HCLGE_DFX_RPU_0_BD_OFFSET, + HCLGE_DFX_RPU_1_BD_OFFSET, + HCLGE_DFX_NCSI_BD_OFFSET, + HCLGE_DFX_RTC_BD_OFFSET, + HCLGE_DFX_PPP_BD_OFFSET, + HCLGE_DFX_RCB_BD_OFFSET, + HCLGE_DFX_TQP_BD_OFFSET, + HCLGE_DFX_SSU_2_BD_OFFSET +}; + +static const enum hclge_opcode_type hclge_dfx_reg_opcode_list[] = { + HCLGE_OPC_DFX_BIOS_COMMON_REG, + HCLGE_OPC_DFX_SSU_REG_0, + HCLGE_OPC_DFX_SSU_REG_1, + HCLGE_OPC_DFX_IGU_EGU_REG, + HCLGE_OPC_DFX_RPU_REG_0, + HCLGE_OPC_DFX_RPU_REG_1, + HCLGE_OPC_DFX_NCSI_REG, + HCLGE_OPC_DFX_RTC_REG, + HCLGE_OPC_DFX_PPP_REG, + HCLGE_OPC_DFX_RCB_REG, + HCLGE_OPC_DFX_TQP_REG, + HCLGE_OPC_DFX_SSU_REG_2 +}; + +enum hclge_reg_tag { + HCLGE_REG_TAG_CMDQ = 0, + HCLGE_REG_TAG_COMMON, + HCLGE_REG_TAG_RING, + HCLGE_REG_TAG_TQP_INTR, + HCLGE_REG_TAG_QUERY_32_BIT, + HCLGE_REG_TAG_QUERY_64_BIT, + HCLGE_REG_TAG_DFX_BIOS_COMMON, + HCLGE_REG_TAG_DFX_SSU_0, + HCLGE_REG_TAG_DFX_SSU_1, + HCLGE_REG_TAG_DFX_IGU_EGU, + HCLGE_REG_TAG_DFX_RPU_0, + HCLGE_REG_TAG_DFX_RPU_1, + HCLGE_REG_TAG_DFX_NCSI, + HCLGE_REG_TAG_DFX_RTC, + HCLGE_REG_TAG_DFX_PPP, + HCLGE_REG_TAG_DFX_RCB, + HCLGE_REG_TAG_DFX_TQP, + HCLGE_REG_TAG_DFX_SSU_2, + HCLGE_REG_TAG_RPU_TNL, +}; + +#pragma pack(4) +struct hclge_reg_tlv { + u16 tag; + u16 len; +}; + +struct hclge_reg_header { + u64 magic_number; + u8 is_vf; + u8 rsv[7]; +}; + +#pragma pack() + +#define HCLGE_REG_TLV_SIZE sizeof(struct hclge_reg_tlv) +#define HCLGE_REG_HEADER_SIZE sizeof(struct hclge_reg_header) +#define HCLGE_REG_TLV_SPACE (sizeof(struct hclge_reg_tlv) / sizeof(u32)) +#define HCLGE_REG_HEADER_SPACE (sizeof(struct hclge_reg_header) / sizeof(u32)) +#define HCLGE_REG_MAGIC_NUMBER 0x686e733372656773 /* meaning is hns3regs */ + +#define HCLGE_REG_RPU_TNL_ID_0 1 + +static u32 hclge_reg_get_header(void *data) +{ + struct hclge_reg_header *header = data; + + header->magic_number = HCLGE_REG_MAGIC_NUMBER; + header->is_vf = 0x0; + + return HCLGE_REG_HEADER_SPACE; +} + +static u32 hclge_reg_get_tlv(u32 tag, u32 regs_num, void *data) +{ + struct hclge_reg_tlv *tlv = data; + + tlv->tag = tag; + tlv->len = regs_num * sizeof(u32) + HCLGE_REG_TLV_SIZE; + + return HCLGE_REG_TLV_SPACE; +} + +static int hclge_get_32_bit_regs(struct hclge_dev *hdev, u32 regs_num, + void *data) +{ +#define HCLGE_32_BIT_REG_RTN_DATANUM 8 +#define HCLGE_32_BIT_DESC_NODATA_LEN 2 + + struct hclge_desc *desc; + u32 *reg_val = data; + __le32 *desc_data; + int nodata_num; + int cmd_num; + int i, k, n; + int ret; + + if (regs_num == 0) + return 0; + + nodata_num = HCLGE_32_BIT_DESC_NODATA_LEN; + cmd_num = DIV_ROUND_UP(regs_num + nodata_num, + HCLGE_32_BIT_REG_RTN_DATANUM); + desc = kcalloc(cmd_num, sizeof(struct hclge_desc), GFP_KERNEL); + if (!desc) + return -ENOMEM; + + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_32_BIT_REG, true); + ret = hclge_cmd_send(&hdev->hw, desc, cmd_num); + if (ret) { + dev_err(&hdev->pdev->dev, + "Query 32 bit register cmd failed, ret = %d.\n", ret); + kfree(desc); + return ret; + } + + for (i = 0; i < cmd_num; i++) { + if (i == 0) { + desc_data = (__le32 *)(&desc[i].data[0]); + n = HCLGE_32_BIT_REG_RTN_DATANUM - nodata_num; + } else { + desc_data = (__le32 *)(&desc[i]); + n = HCLGE_32_BIT_REG_RTN_DATANUM; + } + for (k = 0; k < n; k++) { + *reg_val++ = le32_to_cpu(*desc_data++); + + regs_num--; + if (!regs_num) + break; + } + } + + kfree(desc); + return 0; +} + +static int hclge_get_64_bit_regs(struct hclge_dev *hdev, u32 regs_num, + void *data) +{ +#define HCLGE_64_BIT_REG_RTN_DATANUM 4 +#define HCLGE_64_BIT_DESC_NODATA_LEN 1 + + struct hclge_desc *desc; + u64 *reg_val = data; + __le64 *desc_data; + int nodata_len; + int cmd_num; + int i, k, n; + int ret; + + if (regs_num == 0) + return 0; + + nodata_len = HCLGE_64_BIT_DESC_NODATA_LEN; + cmd_num = DIV_ROUND_UP(regs_num + nodata_len, + HCLGE_64_BIT_REG_RTN_DATANUM); + desc = kcalloc(cmd_num, sizeof(struct hclge_desc), GFP_KERNEL); + if (!desc) + return -ENOMEM; + + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_64_BIT_REG, true); + ret = hclge_cmd_send(&hdev->hw, desc, cmd_num); + if (ret) { + dev_err(&hdev->pdev->dev, + "Query 64 bit register cmd failed, ret = %d.\n", ret); + kfree(desc); + return ret; + } + + for (i = 0; i < cmd_num; i++) { + if (i == 0) { + desc_data = (__le64 *)(&desc[i].data[0]); + n = HCLGE_64_BIT_REG_RTN_DATANUM - nodata_len; + } else { + desc_data = (__le64 *)(&desc[i]); + n = HCLGE_64_BIT_REG_RTN_DATANUM; + } + for (k = 0; k < n; k++) { + *reg_val++ = le64_to_cpu(*desc_data++); + + regs_num--; + if (!regs_num) + break; + } + } + + kfree(desc); + return 0; +} + +int hclge_query_bd_num_cmd_send(struct hclge_dev *hdev, struct hclge_desc *desc) +{ + int i; + + /* initialize command BD except the last one */ + for (i = 0; i < HCLGE_GET_DFX_REG_TYPE_CNT - 1; i++) { + hclge_cmd_setup_basic_desc(&desc[i], HCLGE_OPC_DFX_BD_NUM, + true); + desc[i].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); + } + + /* initialize the last command BD */ + hclge_cmd_setup_basic_desc(&desc[i], HCLGE_OPC_DFX_BD_NUM, true); + + return hclge_cmd_send(&hdev->hw, desc, HCLGE_GET_DFX_REG_TYPE_CNT); +} + +static int hclge_get_dfx_reg_bd_num(struct hclge_dev *hdev, + int *bd_num_list, + u32 type_num) +{ + u32 entries_per_desc, desc_index, index, offset, i; + struct hclge_desc desc[HCLGE_GET_DFX_REG_TYPE_CNT]; + int ret; + + ret = hclge_query_bd_num_cmd_send(hdev, desc); + if (ret) { + dev_err(&hdev->pdev->dev, + "Get dfx bd num fail, status is %d.\n", ret); + return ret; + } + + entries_per_desc = ARRAY_SIZE(desc[0].data); + for (i = 0; i < type_num; i++) { + offset = hclge_dfx_bd_offset_list[i]; + index = offset % entries_per_desc; + desc_index = offset / entries_per_desc; + bd_num_list[i] = le32_to_cpu(desc[desc_index].data[index]); + } + + return ret; +} + +static int hclge_dfx_reg_cmd_send(struct hclge_dev *hdev, + struct hclge_desc *desc_src, int bd_num, + enum hclge_opcode_type cmd) +{ + struct hclge_desc *desc = desc_src; + int i, ret; + + hclge_cmd_setup_basic_desc(desc, cmd, true); + for (i = 0; i < bd_num - 1; i++) { + desc->flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); + desc++; + hclge_cmd_setup_basic_desc(desc, cmd, true); + } + + desc = desc_src; + ret = hclge_cmd_send(&hdev->hw, desc, bd_num); + if (ret) + dev_err(&hdev->pdev->dev, + "Query dfx reg cmd(0x%x) send fail, status is %d.\n", + cmd, ret); + + return ret; +} + +/* tnl_id = 0 means get sum of all tnl reg's value */ +static int hclge_dfx_reg_rpu_tnl_cmd_send(struct hclge_dev *hdev, u32 tnl_id, + struct hclge_desc *desc, int bd_num) +{ + int i, ret; + + for (i = 0; i < bd_num; i++) { + hclge_cmd_setup_basic_desc(&desc[i], HCLGE_OPC_DFX_RPU_REG_0, + true); + if (i != bd_num - 1) + desc[i].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); + } + + desc[0].data[0] = cpu_to_le32(tnl_id); + ret = hclge_cmd_send(&hdev->hw, desc, bd_num); + if (ret) + dev_err(&hdev->pdev->dev, + "failed to query dfx rpu tnl reg, ret = %d\n", + ret); + return ret; +} + +static int hclge_dfx_reg_fetch_data(struct hclge_desc *desc_src, int bd_num, + void *data) +{ + int entries_per_desc, reg_num, desc_index, index, i; + struct hclge_desc *desc = desc_src; + u32 *reg = data; + + entries_per_desc = ARRAY_SIZE(desc->data); + reg_num = entries_per_desc * bd_num; + for (i = 0; i < reg_num; i++) { + index = i % entries_per_desc; + desc_index = i / entries_per_desc; + *reg++ = le32_to_cpu(desc[desc_index].data[index]); + } + + return reg_num; +} + +static int hclge_get_dfx_reg_len(struct hclge_dev *hdev, int *len) +{ + u32 dfx_reg_type_num = ARRAY_SIZE(hclge_dfx_bd_offset_list); + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); + int data_len_per_desc; + int *bd_num_list; + int ret; + u32 i; + + bd_num_list = kcalloc(dfx_reg_type_num, sizeof(int), GFP_KERNEL); + if (!bd_num_list) + return -ENOMEM; + + ret = hclge_get_dfx_reg_bd_num(hdev, bd_num_list, dfx_reg_type_num); + if (ret) { + dev_err(&hdev->pdev->dev, + "Get dfx reg bd num fail, status is %d.\n", ret); + goto out; + } + + data_len_per_desc = sizeof_field(struct hclge_desc, data); + *len = 0; + for (i = 0; i < dfx_reg_type_num; i++) + *len += bd_num_list[i] * data_len_per_desc + HCLGE_REG_TLV_SIZE; + + /** + * the num of dfx_rpu_0 is reused by each dfx_rpu_tnl + * HCLGE_DFX_BD_OFFSET is starting at 1, but the array subscript is + * starting at 0, so offset need '- 1'. + */ + *len += (bd_num_list[HCLGE_DFX_RPU_0_BD_OFFSET - 1] * data_len_per_desc + + HCLGE_REG_TLV_SIZE) * ae_dev->dev_specs.tnl_num; + +out: + kfree(bd_num_list); + return ret; +} + +static int hclge_get_dfx_rpu_tnl_reg(struct hclge_dev *hdev, u32 *reg, + struct hclge_desc *desc_src, + int bd_num) +{ + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); + int ret = 0; + u8 i; + + for (i = HCLGE_REG_RPU_TNL_ID_0; i <= ae_dev->dev_specs.tnl_num; i++) { + ret = hclge_dfx_reg_rpu_tnl_cmd_send(hdev, i, desc_src, bd_num); + if (ret) + break; + + reg += hclge_reg_get_tlv(HCLGE_REG_TAG_RPU_TNL, + ARRAY_SIZE(desc_src->data) * bd_num, + reg); + reg += hclge_dfx_reg_fetch_data(desc_src, bd_num, reg); + } + + return ret; +} + +static int hclge_get_dfx_reg(struct hclge_dev *hdev, void *data) +{ + u32 dfx_reg_type_num = ARRAY_SIZE(hclge_dfx_bd_offset_list); + int bd_num, bd_num_max, buf_len; + struct hclge_desc *desc_src; + int *bd_num_list; + u32 *reg = data; + int ret; + u32 i; + + bd_num_list = kcalloc(dfx_reg_type_num, sizeof(int), GFP_KERNEL); + if (!bd_num_list) + return -ENOMEM; + + ret = hclge_get_dfx_reg_bd_num(hdev, bd_num_list, dfx_reg_type_num); + if (ret) { + dev_err(&hdev->pdev->dev, + "Get dfx reg bd num fail, status is %d.\n", ret); + goto out; + } + + bd_num_max = bd_num_list[0]; + for (i = 1; i < dfx_reg_type_num; i++) + bd_num_max = max_t(int, bd_num_max, bd_num_list[i]); + + buf_len = sizeof(*desc_src) * bd_num_max; + desc_src = kzalloc(buf_len, GFP_KERNEL); + if (!desc_src) { + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < dfx_reg_type_num; i++) { + bd_num = bd_num_list[i]; + ret = hclge_dfx_reg_cmd_send(hdev, desc_src, bd_num, + hclge_dfx_reg_opcode_list[i]); + if (ret) { + dev_err(&hdev->pdev->dev, + "Get dfx reg fail, status is %d.\n", ret); + goto free; + } + + reg += hclge_reg_get_tlv(HCLGE_REG_TAG_DFX_BIOS_COMMON + i, + ARRAY_SIZE(desc_src->data) * bd_num, + reg); + reg += hclge_dfx_reg_fetch_data(desc_src, bd_num, reg); + } + + /** + * HCLGE_DFX_BD_OFFSET is starting at 1, but the array subscript is + * starting at 0, so offset need '- 1'. + */ + bd_num = bd_num_list[HCLGE_DFX_RPU_0_BD_OFFSET - 1]; + ret = hclge_get_dfx_rpu_tnl_reg(hdev, reg, desc_src, bd_num); + +free: + kfree(desc_src); +out: + kfree(bd_num_list); + return ret; +} + +static int hclge_fetch_pf_reg(struct hclge_dev *hdev, void *data, + struct hnae3_knic_private_info *kinfo) +{ +#define HCLGE_RING_REG_OFFSET 0x200 +#define HCLGE_RING_INT_REG_OFFSET 0x4 + + int i, j, reg_num; + int data_num_sum; + u32 *reg = data; + + /* fetching per-PF registers valus from PF PCIe register space */ + reg_num = ARRAY_SIZE(cmdq_reg_addr_list); + reg += hclge_reg_get_tlv(HCLGE_REG_TAG_CMDQ, reg_num, reg); + for (i = 0; i < reg_num; i++) + *reg++ = hclge_read_dev(&hdev->hw, cmdq_reg_addr_list[i]); + data_num_sum = reg_num + HCLGE_REG_TLV_SPACE; + + reg_num = ARRAY_SIZE(common_reg_addr_list); + reg += hclge_reg_get_tlv(HCLGE_REG_TAG_COMMON, reg_num, reg); + for (i = 0; i < reg_num; i++) + *reg++ = hclge_read_dev(&hdev->hw, common_reg_addr_list[i]); + data_num_sum += reg_num + HCLGE_REG_TLV_SPACE; + + reg_num = ARRAY_SIZE(ring_reg_addr_list); + for (j = 0; j < kinfo->num_tqps; j++) { + reg += hclge_reg_get_tlv(HCLGE_REG_TAG_RING, reg_num, reg); + for (i = 0; i < reg_num; i++) + *reg++ = hclge_read_dev(&hdev->hw, + ring_reg_addr_list[i] + + HCLGE_RING_REG_OFFSET * j); + } + data_num_sum += (reg_num + HCLGE_REG_TLV_SPACE) * kinfo->num_tqps; + + reg_num = ARRAY_SIZE(tqp_intr_reg_addr_list); + for (j = 0; j < hdev->num_msi_used - 1; j++) { + reg += hclge_reg_get_tlv(HCLGE_REG_TAG_TQP_INTR, reg_num, reg); + for (i = 0; i < reg_num; i++) + *reg++ = hclge_read_dev(&hdev->hw, + tqp_intr_reg_addr_list[i] + + HCLGE_RING_INT_REG_OFFSET * j); + } + data_num_sum += (reg_num + HCLGE_REG_TLV_SPACE) * + (hdev->num_msi_used - 1); + + return data_num_sum; +} + +static int hclge_get_regs_num(struct hclge_dev *hdev, u32 *regs_num_32_bit, + u32 *regs_num_64_bit) +{ + struct hclge_desc desc; + u32 total_num; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_REG_NUM, true); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "Query register number cmd failed, ret = %d.\n", ret); + return ret; + } + + *regs_num_32_bit = le32_to_cpu(desc.data[0]); + *regs_num_64_bit = le32_to_cpu(desc.data[1]); + + total_num = *regs_num_32_bit + *regs_num_64_bit; + if (!total_num) + return -EINVAL; + + return 0; +} + +int hclge_get_regs_len(struct hnae3_handle *handle) +{ + struct hnae3_knic_private_info *kinfo = &handle->kinfo; + struct hclge_vport *vport = hclge_get_vport(handle); + int regs_num_32_bit, regs_num_64_bit, dfx_regs_len; + int cmdq_len, common_len, ring_len, tqp_intr_len; + int regs_len_32_bit, regs_len_64_bit; + struct hclge_dev *hdev = vport->back; + int ret; + + ret = hclge_get_regs_num(hdev, ®s_num_32_bit, ®s_num_64_bit); + if (ret) { + dev_err(&hdev->pdev->dev, + "Get register number failed, ret = %d.\n", ret); + return ret; + } + + ret = hclge_get_dfx_reg_len(hdev, &dfx_regs_len); + if (ret) { + dev_err(&hdev->pdev->dev, + "Get dfx reg len failed, ret = %d.\n", ret); + return ret; + } + + cmdq_len = HCLGE_REG_TLV_SIZE + sizeof(cmdq_reg_addr_list); + common_len = HCLGE_REG_TLV_SIZE + sizeof(common_reg_addr_list); + ring_len = HCLGE_REG_TLV_SIZE + sizeof(ring_reg_addr_list); + tqp_intr_len = HCLGE_REG_TLV_SIZE + sizeof(tqp_intr_reg_addr_list); + regs_len_32_bit = HCLGE_REG_TLV_SIZE + regs_num_32_bit * sizeof(u32); + regs_len_64_bit = HCLGE_REG_TLV_SIZE + regs_num_64_bit * sizeof(u64); + + /* return the total length of all register values */ + return HCLGE_REG_HEADER_SIZE + cmdq_len + common_len + ring_len * + kinfo->num_tqps + tqp_intr_len * (hdev->num_msi_used - 1) + + regs_len_32_bit + regs_len_64_bit + dfx_regs_len; +} + +void hclge_get_regs(struct hnae3_handle *handle, u32 *version, + void *data) +{ +#define HCLGE_REG_64_BIT_SPACE_MULTIPLE 2 + + struct hnae3_knic_private_info *kinfo = &handle->kinfo; + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + u32 regs_num_32_bit, regs_num_64_bit; + u32 *reg = data; + int ret; + + *version = hdev->fw_version; + + ret = hclge_get_regs_num(hdev, ®s_num_32_bit, ®s_num_64_bit); + if (ret) { + dev_err(&hdev->pdev->dev, + "Get register number failed, ret = %d.\n", ret); + return; + } + + reg += hclge_reg_get_header(reg); + reg += hclge_fetch_pf_reg(hdev, reg, kinfo); + + reg += hclge_reg_get_tlv(HCLGE_REG_TAG_QUERY_32_BIT, + regs_num_32_bit, reg); + ret = hclge_get_32_bit_regs(hdev, regs_num_32_bit, reg); + if (ret) { + dev_err(&hdev->pdev->dev, + "Get 32 bit register failed, ret = %d.\n", ret); + return; + } + reg += regs_num_32_bit; + + reg += hclge_reg_get_tlv(HCLGE_REG_TAG_QUERY_64_BIT, + regs_num_64_bit * + HCLGE_REG_64_BIT_SPACE_MULTIPLE, reg); + ret = hclge_get_64_bit_regs(hdev, regs_num_64_bit, reg); + if (ret) { + dev_err(&hdev->pdev->dev, + "Get 64 bit register failed, ret = %d.\n", ret); + return; + } + reg += regs_num_64_bit * HCLGE_REG_64_BIT_SPACE_MULTIPLE; + + ret = hclge_get_dfx_reg(hdev, reg); + if (ret) + dev_err(&hdev->pdev->dev, + "Get dfx register failed, ret = %d.\n", ret); +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_regs.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_regs.h new file mode 100644 index 000000000000..b6bc1ecb8054 --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_regs.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +// Copyright (c) 2023 Hisilicon Limited. + +#ifndef __HCLGE_REGS_H +#define __HCLGE_REGS_H +#include <linux/types.h> +#include "hclge_comm_cmd.h" + +struct hnae3_handle; +struct hclge_dev; + +int hclge_query_bd_num_cmd_send(struct hclge_dev *hdev, + struct hclge_desc *desc); +int hclge_get_regs_len(struct hnae3_handle *handle); +void hclge_get_regs(struct hnae3_handle *handle, u32 *version, + void *data); +#endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 922c0da3660c..c58c31221762 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -785,6 +785,7 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev) static void hclge_tm_pg_info_init(struct hclge_dev *hdev) { #define BW_PERCENT 100 +#define DEFAULT_BW_WEIGHT 1 u8 i; @@ -806,7 +807,7 @@ static void hclge_tm_pg_info_init(struct hclge_dev *hdev) for (k = 0; k < hdev->tm_info.num_tc; k++) hdev->tm_info.pg_info[i].tc_dwrr[k] = BW_PERCENT; for (; k < HNAE3_MAX_TC; k++) - hdev->tm_info.pg_info[i].tc_dwrr[k] = 0; + hdev->tm_info.pg_info[i].tc_dwrr[k] = DEFAULT_BW_WEIGHT; } } @@ -1484,7 +1485,11 @@ int hclge_tm_schd_setup_hw(struct hclge_dev *hdev) return ret; /* Cfg schd mode for each level schd */ - return hclge_tm_schd_mode_hw(hdev); + ret = hclge_tm_schd_mode_hw(hdev); + if (ret) + return ret; + + return hclge_tm_flush_cfg(hdev, false); } static int hclge_pause_param_setup_hw(struct hclge_dev *hdev) @@ -1548,7 +1553,7 @@ static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc) return 0; } -static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev) +int hclge_mac_pause_setup_hw(struct hclge_dev *hdev) { bool tx_en, rx_en; @@ -2113,3 +2118,28 @@ int hclge_tm_get_port_shaper(struct hclge_dev *hdev, return 0; } + +int hclge_tm_flush_cfg(struct hclge_dev *hdev, bool enable) +{ + struct hclge_desc desc; + int ret; + + if (!hnae3_ae_dev_tm_flush_supported(hdev)) + return 0; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_FLUSH, false); + + desc.data[0] = cpu_to_le32(enable ? HCLGE_TM_FLUSH_EN_MSK : 0); + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to config tm flush, ret = %d\n", ret); + return ret; + } + + if (enable) + msleep(HCLGE_TM_FLUSH_TIME_MS); + + return ret; +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index dd6f1fd486cf..53eec6df5194 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -33,6 +33,9 @@ enum hclge_opcode_type; #define HCLGE_DSCP_MAP_TC_BD_NUM 2 #define HCLGE_DSCP_TC_SHIFT(n) (((n) & 1) * 4) +#define HCLGE_TM_FLUSH_TIME_MS 10 +#define HCLGE_TM_FLUSH_EN_MSK BIT(0) + struct hclge_pg_to_pri_link_cmd { u8 pg_id; u8 rsvd1[3]; @@ -242,6 +245,7 @@ int hclge_pfc_pause_en_cfg(struct hclge_dev *hdev, u8 tx_rx_bitmap, u8 pfc_bitmap); int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx); int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr); +int hclge_mac_pause_setup_hw(struct hclge_dev *hdev); void hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats); void hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats); int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate); @@ -272,4 +276,5 @@ int hclge_tm_get_port_shaper(struct hclge_dev *hdev, struct hclge_tm_shaper_para *para); int hclge_up_to_tc_map(struct hclge_dev *hdev); int hclge_dscp_to_tc_map(struct hclge_dev *hdev); +int hclge_tm_flush_cfg(struct hclge_dev *hdev, bool enable); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index dd08989a4c7c..7a2f9233d695 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -6,6 +6,7 @@ #include <net/rtnetlink.h> #include "hclgevf_cmd.h" #include "hclgevf_main.h" +#include "hclgevf_regs.h" #include "hclge_mbx.h" #include "hnae3.h" #include "hclgevf_devlink.h" @@ -33,58 +34,6 @@ static const struct pci_device_id ae_algovf_pci_tbl[] = { MODULE_DEVICE_TABLE(pci, ae_algovf_pci_tbl); -static const u32 cmdq_reg_addr_list[] = {HCLGE_COMM_NIC_CSQ_BASEADDR_L_REG, - HCLGE_COMM_NIC_CSQ_BASEADDR_H_REG, - HCLGE_COMM_NIC_CSQ_DEPTH_REG, - HCLGE_COMM_NIC_CSQ_TAIL_REG, - HCLGE_COMM_NIC_CSQ_HEAD_REG, - HCLGE_COMM_NIC_CRQ_BASEADDR_L_REG, - HCLGE_COMM_NIC_CRQ_BASEADDR_H_REG, - HCLGE_COMM_NIC_CRQ_DEPTH_REG, - HCLGE_COMM_NIC_CRQ_TAIL_REG, - HCLGE_COMM_NIC_CRQ_HEAD_REG, - HCLGE_COMM_VECTOR0_CMDQ_SRC_REG, - HCLGE_COMM_VECTOR0_CMDQ_STATE_REG, - HCLGE_COMM_CMDQ_INTR_EN_REG, - HCLGE_COMM_CMDQ_INTR_GEN_REG}; - -static const u32 common_reg_addr_list[] = {HCLGEVF_MISC_VECTOR_REG_BASE, - HCLGEVF_RST_ING, - HCLGEVF_GRO_EN_REG}; - -static const u32 ring_reg_addr_list[] = {HCLGEVF_RING_RX_ADDR_L_REG, - HCLGEVF_RING_RX_ADDR_H_REG, - HCLGEVF_RING_RX_BD_NUM_REG, - HCLGEVF_RING_RX_BD_LENGTH_REG, - HCLGEVF_RING_RX_MERGE_EN_REG, - HCLGEVF_RING_RX_TAIL_REG, - HCLGEVF_RING_RX_HEAD_REG, - HCLGEVF_RING_RX_FBD_NUM_REG, - HCLGEVF_RING_RX_OFFSET_REG, - HCLGEVF_RING_RX_FBD_OFFSET_REG, - HCLGEVF_RING_RX_STASH_REG, - HCLGEVF_RING_RX_BD_ERR_REG, - HCLGEVF_RING_TX_ADDR_L_REG, - HCLGEVF_RING_TX_ADDR_H_REG, - HCLGEVF_RING_TX_BD_NUM_REG, - HCLGEVF_RING_TX_PRIORITY_REG, - HCLGEVF_RING_TX_TC_REG, - HCLGEVF_RING_TX_MERGE_EN_REG, - HCLGEVF_RING_TX_TAIL_REG, - HCLGEVF_RING_TX_HEAD_REG, - HCLGEVF_RING_TX_FBD_NUM_REG, - HCLGEVF_RING_TX_OFFSET_REG, - HCLGEVF_RING_TX_EBD_NUM_REG, - HCLGEVF_RING_TX_EBD_OFFSET_REG, - HCLGEVF_RING_TX_BD_ERR_REG, - HCLGEVF_RING_EN_REG}; - -static const u32 tqp_intr_reg_addr_list[] = {HCLGEVF_TQP_INTR_CTRL_REG, - HCLGEVF_TQP_INTR_GL0_REG, - HCLGEVF_TQP_INTR_GL1_REG, - HCLGEVF_TQP_INTR_GL2_REG, - HCLGEVF_TQP_INTR_RL_REG}; - /* hclgevf_cmd_send - send command to command queue * @hw: pointer to the hw struct * @desc: prefilled descriptor for describing the command @@ -111,7 +60,7 @@ void hclgevf_arq_init(struct hclgevf_dev *hdev) spin_unlock(&cmdq->crq.lock); } -static struct hclgevf_dev *hclgevf_ae_get_hdev(struct hnae3_handle *handle) +struct hclgevf_dev *hclgevf_ae_get_hdev(struct hnae3_handle *handle) { if (!handle->client) return container_of(handle, struct hclgevf_dev, nic); @@ -121,8 +70,7 @@ static struct hclgevf_dev *hclgevf_ae_get_hdev(struct hnae3_handle *handle) return container_of(handle, struct hclgevf_dev, nic); } -static void hclgevf_update_stats(struct hnae3_handle *handle, - struct net_device_stats *net_stats) +static void hclgevf_update_stats(struct hnae3_handle *handle) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); int status; @@ -1645,8 +1593,7 @@ err_reset: hclgevf_reset_err_handle(hdev); } -static enum hnae3_reset_type hclgevf_get_reset_level(struct hclgevf_dev *hdev, - unsigned long *addr) +static enum hnae3_reset_type hclgevf_get_reset_level(unsigned long *addr) { enum hnae3_reset_type rst_level = HNAE3_NONE_RESET; @@ -1685,8 +1632,7 @@ static void hclgevf_reset_event(struct pci_dev *pdev, if (hdev->default_reset_request) hdev->reset_level = - hclgevf_get_reset_level(hdev, - &hdev->default_reset_request); + hclgevf_get_reset_level(&hdev->default_reset_request); else hdev->reset_level = HNAE3_VF_FUNC_RESET; @@ -1828,7 +1774,7 @@ static void hclgevf_reset_service_task(struct hclgevf_dev *hdev) hdev->last_reset_time = jiffies; hdev->reset_type = - hclgevf_get_reset_level(hdev, &hdev->reset_pending); + hclgevf_get_reset_level(&hdev->reset_pending); if (hdev->reset_type != HNAE3_NONE_RESET) hclgevf_reset(hdev); } else if (test_and_clear_bit(HCLGEVF_RESET_REQUESTED, @@ -2160,8 +2106,7 @@ static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev) if (ret) return ret; - ret = hclge_comm_set_rss_input_tuple(&hdev->nic, &hdev->hw.hw, - false, rss_cfg); + ret = hclge_comm_set_rss_input_tuple(&hdev->hw.hw, rss_cfg); if (ret) return ret; } @@ -3262,72 +3207,6 @@ static void hclgevf_get_link_mode(struct hnae3_handle *handle, *advertising = hdev->hw.mac.advertising; } -#define MAX_SEPARATE_NUM 4 -#define SEPARATOR_VALUE 0xFDFCFBFA -#define REG_NUM_PER_LINE 4 -#define REG_LEN_PER_LINE (REG_NUM_PER_LINE * sizeof(u32)) - -static int hclgevf_get_regs_len(struct hnae3_handle *handle) -{ - int cmdq_lines, common_lines, ring_lines, tqp_intr_lines; - struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); - - cmdq_lines = sizeof(cmdq_reg_addr_list) / REG_LEN_PER_LINE + 1; - common_lines = sizeof(common_reg_addr_list) / REG_LEN_PER_LINE + 1; - ring_lines = sizeof(ring_reg_addr_list) / REG_LEN_PER_LINE + 1; - tqp_intr_lines = sizeof(tqp_intr_reg_addr_list) / REG_LEN_PER_LINE + 1; - - return (cmdq_lines + common_lines + ring_lines * hdev->num_tqps + - tqp_intr_lines * (hdev->num_msi_used - 1)) * REG_LEN_PER_LINE; -} - -static void hclgevf_get_regs(struct hnae3_handle *handle, u32 *version, - void *data) -{ - struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); - int i, j, reg_um, separator_num; - u32 *reg = data; - - *version = hdev->fw_version; - - /* fetching per-VF registers values from VF PCIe register space */ - reg_um = sizeof(cmdq_reg_addr_list) / sizeof(u32); - separator_num = MAX_SEPARATE_NUM - reg_um % REG_NUM_PER_LINE; - for (i = 0; i < reg_um; i++) - *reg++ = hclgevf_read_dev(&hdev->hw, cmdq_reg_addr_list[i]); - for (i = 0; i < separator_num; i++) - *reg++ = SEPARATOR_VALUE; - - reg_um = sizeof(common_reg_addr_list) / sizeof(u32); - separator_num = MAX_SEPARATE_NUM - reg_um % REG_NUM_PER_LINE; - for (i = 0; i < reg_um; i++) - *reg++ = hclgevf_read_dev(&hdev->hw, common_reg_addr_list[i]); - for (i = 0; i < separator_num; i++) - *reg++ = SEPARATOR_VALUE; - - reg_um = sizeof(ring_reg_addr_list) / sizeof(u32); - separator_num = MAX_SEPARATE_NUM - reg_um % REG_NUM_PER_LINE; - for (j = 0; j < hdev->num_tqps; j++) { - for (i = 0; i < reg_um; i++) - *reg++ = hclgevf_read_dev(&hdev->hw, - ring_reg_addr_list[i] + - HCLGEVF_TQP_REG_SIZE * j); - for (i = 0; i < separator_num; i++) - *reg++ = SEPARATOR_VALUE; - } - - reg_um = sizeof(tqp_intr_reg_addr_list) / sizeof(u32); - separator_num = MAX_SEPARATE_NUM - reg_um % REG_NUM_PER_LINE; - for (j = 0; j < hdev->num_msi_used - 1; j++) { - for (i = 0; i < reg_um; i++) - *reg++ = hclgevf_read_dev(&hdev->hw, - tqp_intr_reg_addr_list[i] + - 4 * j); - for (i = 0; i < separator_num; i++) - *reg++ = SEPARATOR_VALUE; - } -} - void hclgevf_update_port_base_vlan_info(struct hclgevf_dev *hdev, u16 state, struct hclge_mbx_port_base_vlan *port_base_vlan) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 59ca6c794d6d..81c16b8c8da2 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -294,4 +294,5 @@ void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev); void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev); void hclgevf_update_port_base_vlan_info(struct hclgevf_dev *hdev, u16 state, struct hclge_mbx_port_base_vlan *port_base_vlan); +struct hclgevf_dev *hclgevf_ae_get_hdev(struct hnae3_handle *handle); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_regs.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_regs.c new file mode 100644 index 000000000000..197ab733306b --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_regs.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright (c) 2023 Hisilicon Limited. + +#include "hclgevf_main.h" +#include "hclgevf_regs.h" +#include "hnae3.h" + +static const u32 cmdq_reg_addr_list[] = {HCLGE_COMM_NIC_CSQ_BASEADDR_L_REG, + HCLGE_COMM_NIC_CSQ_BASEADDR_H_REG, + HCLGE_COMM_NIC_CSQ_DEPTH_REG, + HCLGE_COMM_NIC_CSQ_TAIL_REG, + HCLGE_COMM_NIC_CSQ_HEAD_REG, + HCLGE_COMM_NIC_CRQ_BASEADDR_L_REG, + HCLGE_COMM_NIC_CRQ_BASEADDR_H_REG, + HCLGE_COMM_NIC_CRQ_DEPTH_REG, + HCLGE_COMM_NIC_CRQ_TAIL_REG, + HCLGE_COMM_NIC_CRQ_HEAD_REG, + HCLGE_COMM_VECTOR0_CMDQ_SRC_REG, + HCLGE_COMM_VECTOR0_CMDQ_STATE_REG, + HCLGE_COMM_CMDQ_INTR_EN_REG, + HCLGE_COMM_CMDQ_INTR_GEN_REG}; + +static const u32 common_reg_addr_list[] = {HCLGEVF_MISC_VECTOR_REG_BASE, + HCLGEVF_RST_ING, + HCLGEVF_GRO_EN_REG}; + +static const u32 ring_reg_addr_list[] = {HCLGEVF_RING_RX_ADDR_L_REG, + HCLGEVF_RING_RX_ADDR_H_REG, + HCLGEVF_RING_RX_BD_NUM_REG, + HCLGEVF_RING_RX_BD_LENGTH_REG, + HCLGEVF_RING_RX_MERGE_EN_REG, + HCLGEVF_RING_RX_TAIL_REG, + HCLGEVF_RING_RX_HEAD_REG, + HCLGEVF_RING_RX_FBD_NUM_REG, + HCLGEVF_RING_RX_OFFSET_REG, + HCLGEVF_RING_RX_FBD_OFFSET_REG, + HCLGEVF_RING_RX_STASH_REG, + HCLGEVF_RING_RX_BD_ERR_REG, + HCLGEVF_RING_TX_ADDR_L_REG, + HCLGEVF_RING_TX_ADDR_H_REG, + HCLGEVF_RING_TX_BD_NUM_REG, + HCLGEVF_RING_TX_PRIORITY_REG, + HCLGEVF_RING_TX_TC_REG, + HCLGEVF_RING_TX_MERGE_EN_REG, + HCLGEVF_RING_TX_TAIL_REG, + HCLGEVF_RING_TX_HEAD_REG, + HCLGEVF_RING_TX_FBD_NUM_REG, + HCLGEVF_RING_TX_OFFSET_REG, + HCLGEVF_RING_TX_EBD_NUM_REG, + HCLGEVF_RING_TX_EBD_OFFSET_REG, + HCLGEVF_RING_TX_BD_ERR_REG, + HCLGEVF_RING_EN_REG}; + +static const u32 tqp_intr_reg_addr_list[] = {HCLGEVF_TQP_INTR_CTRL_REG, + HCLGEVF_TQP_INTR_GL0_REG, + HCLGEVF_TQP_INTR_GL1_REG, + HCLGEVF_TQP_INTR_GL2_REG, + HCLGEVF_TQP_INTR_RL_REG}; + +#define MAX_SEPARATE_NUM 4 +#define SEPARATOR_VALUE 0xFDFCFBFA +#define REG_NUM_PER_LINE 4 +#define REG_LEN_PER_LINE (REG_NUM_PER_LINE * sizeof(u32)) + +int hclgevf_get_regs_len(struct hnae3_handle *handle) +{ + int cmdq_lines, common_lines, ring_lines, tqp_intr_lines; + struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + + cmdq_lines = sizeof(cmdq_reg_addr_list) / REG_LEN_PER_LINE + 1; + common_lines = sizeof(common_reg_addr_list) / REG_LEN_PER_LINE + 1; + ring_lines = sizeof(ring_reg_addr_list) / REG_LEN_PER_LINE + 1; + tqp_intr_lines = sizeof(tqp_intr_reg_addr_list) / REG_LEN_PER_LINE + 1; + + return (cmdq_lines + common_lines + ring_lines * hdev->num_tqps + + tqp_intr_lines * (hdev->num_msi_used - 1)) * REG_LEN_PER_LINE; +} + +void hclgevf_get_regs(struct hnae3_handle *handle, u32 *version, + void *data) +{ +#define HCLGEVF_RING_REG_OFFSET 0x200 +#define HCLGEVF_RING_INT_REG_OFFSET 0x4 + + struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + int i, j, reg_um, separator_num; + u32 *reg = data; + + *version = hdev->fw_version; + + /* fetching per-VF registers values from VF PCIe register space */ + reg_um = sizeof(cmdq_reg_addr_list) / sizeof(u32); + separator_num = MAX_SEPARATE_NUM - reg_um % REG_NUM_PER_LINE; + for (i = 0; i < reg_um; i++) + *reg++ = hclgevf_read_dev(&hdev->hw, cmdq_reg_addr_list[i]); + for (i = 0; i < separator_num; i++) + *reg++ = SEPARATOR_VALUE; + + reg_um = sizeof(common_reg_addr_list) / sizeof(u32); + separator_num = MAX_SEPARATE_NUM - reg_um % REG_NUM_PER_LINE; + for (i = 0; i < reg_um; i++) + *reg++ = hclgevf_read_dev(&hdev->hw, common_reg_addr_list[i]); + for (i = 0; i < separator_num; i++) + *reg++ = SEPARATOR_VALUE; + + reg_um = sizeof(ring_reg_addr_list) / sizeof(u32); + separator_num = MAX_SEPARATE_NUM - reg_um % REG_NUM_PER_LINE; + for (j = 0; j < hdev->num_tqps; j++) { + for (i = 0; i < reg_um; i++) + *reg++ = hclgevf_read_dev(&hdev->hw, + ring_reg_addr_list[i] + + HCLGEVF_RING_REG_OFFSET * j); + for (i = 0; i < separator_num; i++) + *reg++ = SEPARATOR_VALUE; + } + + reg_um = sizeof(tqp_intr_reg_addr_list) / sizeof(u32); + separator_num = MAX_SEPARATE_NUM - reg_um % REG_NUM_PER_LINE; + for (j = 0; j < hdev->num_msi_used - 1; j++) { + for (i = 0; i < reg_um; i++) + *reg++ = hclgevf_read_dev(&hdev->hw, + tqp_intr_reg_addr_list[i] + + HCLGEVF_RING_INT_REG_OFFSET * j); + for (i = 0; i < separator_num; i++) + *reg++ = SEPARATOR_VALUE; + } +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_regs.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_regs.h new file mode 100644 index 000000000000..77bdcf60a1af --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_regs.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (c) 2023 Hisilicon Limited. */ + +#ifndef __HCLGEVF_REGS_H +#define __HCLGEVF_REGS_H +#include <linux/types.h> + +struct hnae3_handle; + +int hclgevf_get_regs_len(struct hnae3_handle *handle); +void hclgevf_get_regs(struct hnae3_handle *handle, u32 *version, + void *data); +#endif diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c index 3ee89ae496d0..773d7aa29ef5 100644 --- a/drivers/net/ethernet/i825xx/82596.c +++ b/drivers/net/ethernet/i825xx/82596.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* 82596.c: A generic 82596 ethernet driver for linux. */ /* Based on Apricot.c @@ -31,9 +32,7 @@ Driver skeleton Written 1993 by Donald Becker. Copyright 1993 United States Government as represented by the Director, - National Security Agency. This software may only be used and distributed - according to the terms of the GNU General Public License as modified by SRC, - incorporated herein by reference. + National Security Agency. The author may be reached as becker@scyld.com, or C/O Scyld Computing Corporation, 410 Severn Ave., Suite 210, Annapolis MD 21403 diff --git a/drivers/net/ethernet/i825xx/lasi_82596.c b/drivers/net/ethernet/i825xx/lasi_82596.c index 0af70094aba3..3e53e0c243ba 100644 --- a/drivers/net/ethernet/i825xx/lasi_82596.c +++ b/drivers/net/ethernet/i825xx/lasi_82596.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* lasi_82596.c -- driver for the intel 82596 ethernet controller, as munged into HPPA boxen . @@ -59,9 +60,7 @@ Driver skeleton Written 1993 by Donald Becker. Copyright 1993 United States Government as represented by the Director, - National Security Agency. This software may only be used and distributed - according to the terms of the GNU General Public License as modified by SRC, - incorporated herein by reference. + National Security Agency. The author may be reached as becker@scyld.com, or C/O Scyld Computing Corporation, 410 Severn Ave., Suite 210, Annapolis MD 21403 diff --git a/drivers/net/ethernet/i825xx/lib82596.c b/drivers/net/ethernet/i825xx/lib82596.c index ca2fb303fcc6..67d248a7a6f4 100644 --- a/drivers/net/ethernet/i825xx/lib82596.c +++ b/drivers/net/ethernet/i825xx/lib82596.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* lasi_82596.c -- driver for the intel 82596 ethernet controller, as munged into HPPA boxen . @@ -59,9 +60,7 @@ Driver skeleton Written 1993 by Donald Becker. Copyright 1993 United States Government as represented by the Director, - National Security Agency. This software may only be used and distributed - according to the terms of the GNU General Public License as modified by SRC, - incorporated herein by reference. + National Security Agency. The author may be reached as becker@scyld.com, or C/O Scyld Computing Corporation, 410 Severn Ave., Suite 210, Annapolis MD 21403 diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c index 3909c6a0af89..5e27470c6b1e 100644 --- a/drivers/net/ethernet/i825xx/sun3_82586.c +++ b/drivers/net/ethernet/i825xx/sun3_82586.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Sun3 i82586 Ethernet driver * diff --git a/drivers/net/ethernet/i825xx/sun3_82586.h b/drivers/net/ethernet/i825xx/sun3_82586.h index d82eca563266..d8e249d704a7 100644 --- a/drivers/net/ethernet/i825xx/sun3_82586.h +++ b/drivers/net/ethernet/i825xx/sun3_82586.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Intel i82586 Ethernet definitions * diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 113fcb3e353e..832a2ae01950 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -203,7 +203,7 @@ static inline void ibmveth_flush_buffer(void *addr, unsigned long length) unsigned long offset; for (offset = 0; offset < length; offset += SMP_CACHE_BYTES) - asm("dcbfl %0,%1" :: "b" (addr), "r" (offset)); + asm("dcbf %0,%1,1" :: "b" (addr), "r" (offset)); } /* replenish the buffers for a pool. note that we don't need to diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index c63d3ec9d328..df76cdaddcfb 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -97,6 +97,8 @@ static int pending_scrq(struct ibmvnic_adapter *, static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *, struct ibmvnic_sub_crq_queue *); static int ibmvnic_poll(struct napi_struct *napi, int data); +static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter); +static inline void reinit_init_done(struct ibmvnic_adapter *adapter); static void send_query_map(struct ibmvnic_adapter *adapter); static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, u32, u8); static int send_request_unmap(struct ibmvnic_adapter *, u8); @@ -114,6 +116,7 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, static void free_long_term_buff(struct ibmvnic_adapter *adapter, struct ibmvnic_long_term_buff *ltb); static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter); +static void flush_reset_queue(struct ibmvnic_adapter *adapter); struct ibmvnic_stat { char name[ETH_GSTRING_LEN]; @@ -1505,8 +1508,8 @@ static const char *adapter_state_to_string(enum vnic_state state) static int ibmvnic_login(struct net_device *netdev) { + unsigned long flags, timeout = msecs_to_jiffies(20000); struct ibmvnic_adapter *adapter = netdev_priv(netdev); - unsigned long timeout = msecs_to_jiffies(20000); int retry_count = 0; int retries = 10; bool retry; @@ -1527,11 +1530,9 @@ static int ibmvnic_login(struct net_device *netdev) if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { - netdev_warn(netdev, "Login timed out, retrying...\n"); - retry = true; - adapter->init_done_rc = 0; - retry_count++; - continue; + netdev_warn(netdev, "Login timed out\n"); + adapter->login_pending = false; + goto partial_reset; } if (adapter->init_done_rc == ABORTED) { @@ -1573,10 +1574,69 @@ static int ibmvnic_login(struct net_device *netdev) "SCRQ irq initialization failed\n"); return rc; } + /* Default/timeout error handling, reset and start fresh */ } else if (adapter->init_done_rc) { netdev_warn(netdev, "Adapter login failed, init_done_rc = %d\n", adapter->init_done_rc); - return -EIO; + +partial_reset: + /* adapter login failed, so free any CRQs or sub-CRQs + * and register again before attempting to login again. + * If we don't do this then the VIOS may think that + * we are already logged in and reject any subsequent + * attempts + */ + netdev_warn(netdev, + "Freeing and re-registering CRQs before attempting to login again\n"); + retry = true; + adapter->init_done_rc = 0; + release_sub_crqs(adapter, true); + /* Much of this is similar logic as ibmvnic_probe(), + * we are essentially re-initializing communication + * with the server. We really should not run any + * resets/failovers here because this is already a form + * of reset and we do not want parallel resets occurring + */ + do { + reinit_init_done(adapter); + /* Clear any failovers we got in the previous + * pass since we are re-initializing the CRQ + */ + adapter->failover_pending = false; + release_crq_queue(adapter); + /* If we don't sleep here then we risk an + * unnecessary failover event from the VIOS. + * This is a known VIOS issue caused by a vnic + * device freeing and registering a CRQ too + * quickly. + */ + msleep(1500); + /* Avoid any resets, since we are currently + * resetting. + */ + spin_lock_irqsave(&adapter->rwi_lock, flags); + flush_reset_queue(adapter); + spin_unlock_irqrestore(&adapter->rwi_lock, + flags); + + rc = init_crq_queue(adapter); + if (rc) { + netdev_err(netdev, "login recovery: init CRQ failed %d\n", + rc); + return -EIO; + } + + rc = ibmvnic_reset_init(adapter, false); + if (rc) + netdev_err(netdev, "login recovery: Reset init failed %d\n", + rc); + /* IBMVNIC_CRQ_INIT will return EAGAIN if it + * fails, since ibmvnic_reset_init will free + * irq's in failure, we won't be able to receive + * new CRQs so we need to keep trying. probe() + * handles this similarly. + */ + } while (rc == -EAGAIN && retry_count++ < retries); } } while (retry); @@ -1588,12 +1648,22 @@ static int ibmvnic_login(struct net_device *netdev) static void release_login_buffer(struct ibmvnic_adapter *adapter) { + if (!adapter->login_buf) + return; + + dma_unmap_single(&adapter->vdev->dev, adapter->login_buf_token, + adapter->login_buf_sz, DMA_TO_DEVICE); kfree(adapter->login_buf); adapter->login_buf = NULL; } static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter) { + if (!adapter->login_rsp_buf) + return; + + dma_unmap_single(&adapter->vdev->dev, adapter->login_rsp_buf_token, + adapter->login_rsp_buf_sz, DMA_FROM_DEVICE); kfree(adapter->login_rsp_buf); adapter->login_rsp_buf = NULL; } @@ -1816,7 +1886,14 @@ static int __ibmvnic_open(struct net_device *netdev) if (prev_state == VNIC_CLOSED) enable_irq(adapter->tx_scrq[i]->irq); enable_scrq_irq(adapter, adapter->tx_scrq[i]); - netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i)); + /* netdev_tx_reset_queue will reset dql stats. During NON_FATAL + * resets, don't reset the stats because there could be batched + * skb's waiting to be sent. If we reset dql stats, we risk + * num_completed being greater than num_queued. This will cause + * a BUG_ON in dql_completed(). + */ + if (adapter->reset_reason != VNIC_RESET_NON_FATAL) + netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i)); } rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP); @@ -4823,11 +4900,14 @@ static int send_login(struct ibmvnic_adapter *adapter) if (rc) { adapter->login_pending = false; netdev_err(adapter->netdev, "Failed to send login, rc=%d\n", rc); - goto buf_rsp_map_failed; + goto buf_send_failed; } return 0; +buf_send_failed: + dma_unmap_single(dev, rsp_buffer_token, rsp_buffer_size, + DMA_FROM_DEVICE); buf_rsp_map_failed: kfree(login_rsp_buffer); adapter->login_rsp_buf = NULL; @@ -5389,6 +5469,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, int num_tx_pools; int num_rx_pools; u64 *size_array; + u32 rsp_len; int i; /* CHECK: Test/set of login_pending does not need to be atomic @@ -5400,11 +5481,6 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, } adapter->login_pending = false; - dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz, - DMA_TO_DEVICE); - dma_unmap_single(dev, adapter->login_rsp_buf_token, - adapter->login_rsp_buf_sz, DMA_FROM_DEVICE); - /* If the number of queues requested can't be allocated by the * server, the login response will return with code 1. We will need * to resend the login buffer with fewer queues requested. @@ -5440,6 +5516,23 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, ibmvnic_reset(adapter, VNIC_RESET_FATAL); return -EIO; } + + rsp_len = be32_to_cpu(login_rsp->len); + if (be32_to_cpu(login->login_rsp_len) < rsp_len || + rsp_len <= be32_to_cpu(login_rsp->off_txsubm_subcrqs) || + rsp_len <= be32_to_cpu(login_rsp->off_rxadd_subcrqs) || + rsp_len <= be32_to_cpu(login_rsp->off_rxadd_buff_size) || + rsp_len <= be32_to_cpu(login_rsp->off_supp_tx_desc)) { + /* This can happen if a login request times out and there are + * 2 outstanding login requests sent, the LOGIN_RSP crq + * could have been for the older login request. So we are + * parsing the newer response buffer which may be incomplete + */ + dev_err(dev, "FATAL: Login rsp offsets/lengths invalid\n"); + ibmvnic_reset(adapter, VNIC_RESET_FATAL); + return -EIO; + } + size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size)); /* variable buffer sizes are not supported, so just read the diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index bd7ef59b1f2e..771a3c909c45 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -4198,7 +4198,7 @@ void e1000e_reset(struct e1000_adapter *adapter) /** * e1000e_trigger_lsc - trigger an LSC interrupt - * @adapter: + * @adapter: board private structure * * Fire a link status change interrupt to start the watchdog. **/ diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 9954493cd448..62497f5565c5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -1839,7 +1839,7 @@ void i40e_dbg_pf_exit(struct i40e_pf *pf) void i40e_dbg_init(void) { i40e_dbg_root = debugfs_create_dir(i40e_driver_name, NULL); - if (!i40e_dbg_root) + if (IS_ERR(i40e_dbg_root)) pr_info("init of debugfs failed\n"); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b847bd105b16..a86bfa3bba74 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1788,12 +1788,6 @@ static int i40e_set_mac(struct net_device *netdev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - if (ether_addr_equal(netdev->dev_addr, addr->sa_data)) { - netdev_info(netdev, "already using mac address %pM\n", - addr->sa_data); - return 0; - } - if (test_bit(__I40E_DOWN, pf->state) || test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) return -EADDRNOTAVAIL; @@ -2615,7 +2609,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) retval = i40e_correct_mac_vlan_filters (vsi, &tmp_add_list, &tmp_del_list, vlan_filters); - else + else if (pf->vf) retval = i40e_correct_vf_mac_vlan_filters (vsi, &tmp_add_list, &tmp_del_list, vlan_filters, pf->vf[vsi->vf_id].trusted); @@ -2788,7 +2782,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) } /* if the VF is not trusted do not do promisc */ - if ((vsi->type == I40E_VSI_SRIOV) && !pf->vf[vsi->vf_id].trusted) { + if (vsi->type == I40E_VSI_SRIOV && pf->vf && + !pf->vf[vsi->vf_id].trusted) { clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); goto out; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index 9da0c87f0328..f99c1f7fec40 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -210,11 +210,11 @@ read_nvm_exit: * @hw: pointer to the HW structure. * @module_pointer: module pointer location in words from the NVM beginning * @offset: offset in words from module start - * @words: number of words to write - * @data: buffer with words to write to the Shadow RAM + * @words: number of words to read + * @data: buffer with words to read to the Shadow RAM * @last_command: tells the AdminQ that this is the last command * - * Writes a 16 bit words buffer to the Shadow RAM using the admin command. + * Reads a 16 bit words buffer to the Shadow RAM using the admin command. **/ static int i40e_read_nvm_aq(struct i40e_hw *hw, u8 module_pointer, u32 offset, @@ -234,18 +234,18 @@ static int i40e_read_nvm_aq(struct i40e_hw *hw, */ if ((offset + words) > hw->nvm.sr_size) i40e_debug(hw, I40E_DEBUG_NVM, - "NVM write error: offset %d beyond Shadow RAM limit %d\n", + "NVM read error: offset %d beyond Shadow RAM limit %d\n", (offset + words), hw->nvm.sr_size); else if (words > I40E_SR_SECTOR_SIZE_IN_WORDS) - /* We can write only up to 4KB (one sector), in one AQ write */ + /* We can read only up to 4KB (one sector), in one AQ write */ i40e_debug(hw, I40E_DEBUG_NVM, - "NVM write fail error: tried to write %d words, limit is %d.\n", + "NVM read fail error: tried to read %d words, limit is %d.\n", words, I40E_SR_SECTOR_SIZE_IN_WORDS); else if (((offset + (words - 1)) / I40E_SR_SECTOR_SIZE_IN_WORDS) != (offset / I40E_SR_SECTOR_SIZE_IN_WORDS)) - /* A single write cannot spread over two sectors */ + /* A single read cannot spread over two sectors */ i40e_debug(hw, I40E_DEBUG_NVM, - "NVM write error: cannot spread over two sectors in a single write offset=%d words=%d\n", + "NVM read error: cannot spread over two sectors in a single read offset=%d words=%d\n", offset, words); else ret_code = i40e_aq_read_nvm(hw, module_pointer, diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index cd7b52fb6b46..05ec1181471e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -582,7 +582,7 @@ static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring, * @vsi: Current VSI * @tx_ring: XDP Tx ring * - * Returns true if cleanup/tranmission is done. + * Returns true if cleanup/transmission is done. **/ bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring) { diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 39d0fe76a38f..8cbdebc5b698 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -255,8 +255,10 @@ struct iavf_adapter { struct workqueue_struct *wq; struct work_struct reset_task; struct work_struct adminq_task; + struct work_struct finish_config; struct delayed_work client_task; wait_queue_head_t down_waitqueue; + wait_queue_head_t reset_waitqueue; wait_queue_head_t vc_waitqueue; struct iavf_q_vector *q_vectors; struct list_head vlan_filter_list; @@ -518,14 +520,12 @@ int iavf_up(struct iavf_adapter *adapter); void iavf_down(struct iavf_adapter *adapter); int iavf_process_config(struct iavf_adapter *adapter); int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter); -void iavf_schedule_reset(struct iavf_adapter *adapter); +void iavf_schedule_reset(struct iavf_adapter *adapter, u64 flags); void iavf_schedule_request_stats(struct iavf_adapter *adapter); +void iavf_schedule_finish_config(struct iavf_adapter *adapter); void iavf_reset(struct iavf_adapter *adapter); void iavf_set_ethtool_ops(struct net_device *netdev); void iavf_update_stats(struct iavf_adapter *adapter); -void iavf_reset_interrupt_capability(struct iavf_adapter *adapter); -int iavf_init_interrupt_scheme(struct iavf_adapter *adapter); -void iavf_irq_enable_queues(struct iavf_adapter *adapter); void iavf_free_all_tx_resources(struct iavf_adapter *adapter); void iavf_free_all_rx_resources(struct iavf_adapter *adapter); @@ -579,17 +579,11 @@ void iavf_enable_vlan_stripping_v2(struct iavf_adapter *adapter, u16 tpid); void iavf_disable_vlan_stripping_v2(struct iavf_adapter *adapter, u16 tpid); void iavf_enable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid); void iavf_disable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid); -int iavf_replace_primary_mac(struct iavf_adapter *adapter, - const u8 *new_mac); -void -iavf_set_vlan_offload_features(struct iavf_adapter *adapter, - netdev_features_t prev_features, - netdev_features_t features); void iavf_add_fdir_filter(struct iavf_adapter *adapter); void iavf_del_fdir_filter(struct iavf_adapter *adapter); void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter); void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter); struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, const u8 *macaddr); -int iavf_lock_timeout(struct mutex *lock, unsigned int msecs); +int iavf_wait_for_reset(struct iavf_adapter *adapter); #endif /* _IAVF_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_alloc.h b/drivers/net/ethernet/intel/iavf/iavf_alloc.h index 2711573c14ec..162ea70685a6 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_alloc.h +++ b/drivers/net/ethernet/intel/iavf/iavf_alloc.h @@ -28,7 +28,6 @@ enum iavf_status iavf_free_dma_mem(struct iavf_hw *hw, struct iavf_dma_mem *mem); enum iavf_status iavf_allocate_virt_mem(struct iavf_hw *hw, struct iavf_virt_mem *mem, u32 size); -enum iavf_status iavf_free_virt_mem(struct iavf_hw *hw, - struct iavf_virt_mem *mem); +void iavf_free_virt_mem(struct iavf_hw *hw, struct iavf_virt_mem *mem); #endif /* _IAVF_ALLOC_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_common.c b/drivers/net/ethernet/intel/iavf/iavf_common.c index dd11dbbd5551..1afd761d8052 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_common.c +++ b/drivers/net/ethernet/intel/iavf/iavf_common.c @@ -35,7 +35,6 @@ enum iavf_status iavf_set_mac_type(struct iavf_hw *hw) status = IAVF_ERR_DEVICE_NOT_SUPPORTED; } - hw_dbg(hw, "found mac: %d, returns: %d\n", hw->mac.type, status); return status; } @@ -398,23 +397,6 @@ static enum iavf_status iavf_aq_get_set_rss_lut(struct iavf_hw *hw, } /** - * iavf_aq_get_rss_lut - * @hw: pointer to the hardware structure - * @vsi_id: vsi fw index - * @pf_lut: for PF table set true, for VSI table set false - * @lut: pointer to the lut buffer provided by the caller - * @lut_size: size of the lut buffer - * - * get the RSS lookup table, PF or VSI type - **/ -enum iavf_status iavf_aq_get_rss_lut(struct iavf_hw *hw, u16 vsi_id, - bool pf_lut, u8 *lut, u16 lut_size) -{ - return iavf_aq_get_set_rss_lut(hw, vsi_id, pf_lut, lut, lut_size, - false); -} - -/** * iavf_aq_set_rss_lut * @hw: pointer to the hardware structure * @vsi_id: vsi fw index @@ -473,19 +455,6 @@ iavf_status iavf_aq_get_set_rss_key(struct iavf_hw *hw, u16 vsi_id, } /** - * iavf_aq_get_rss_key - * @hw: pointer to the hw struct - * @vsi_id: vsi fw index - * @key: pointer to key info struct - * - **/ -enum iavf_status iavf_aq_get_rss_key(struct iavf_hw *hw, u16 vsi_id, - struct iavf_aqc_get_set_rss_key_data *key) -{ - return iavf_aq_get_set_rss_key(hw, vsi_id, key, false); -} - -/** * iavf_aq_set_rss_key * @hw: pointer to the hw struct * @vsi_id: vsi fw index @@ -828,17 +797,3 @@ void iavf_vf_parse_hw_config(struct iavf_hw *hw, vsi_res++; } } - -/** - * iavf_vf_reset - * @hw: pointer to the hardware structure - * - * Send a VF_RESET message to the PF. Does not wait for response from PF - * as none will be forthcoming. Immediately after calling this function, - * the admin queue should be shut down and (optionally) reinitialized. - **/ -enum iavf_status iavf_vf_reset(struct iavf_hw *hw) -{ - return iavf_aq_send_msg_to_pf(hw, VIRTCHNL_OP_RESET_VF, - 0, NULL, 0, NULL); -} diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 6f171d1d85b7..a34303ad057d 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -484,6 +484,7 @@ static int iavf_set_priv_flags(struct net_device *netdev, u32 flags) { struct iavf_adapter *adapter = netdev_priv(netdev); u32 orig_flags, new_flags, changed_flags; + int ret = 0; u32 i; orig_flags = READ_ONCE(adapter->flags); @@ -531,12 +532,14 @@ static int iavf_set_priv_flags(struct net_device *netdev, u32 flags) /* issue a reset to force legacy-rx change to take effect */ if (changed_flags & IAVF_FLAG_LEGACY_RX) { if (netif_running(netdev)) { - adapter->flags |= IAVF_FLAG_RESET_NEEDED; - queue_work(adapter->wq, &adapter->reset_task); + iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); + ret = iavf_wait_for_reset(adapter); + if (ret) + netdev_warn(netdev, "Changing private flags timeout or interrupted waiting for reset"); } } - return 0; + return ret; } /** @@ -627,6 +630,7 @@ static int iavf_set_ringparam(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); u32 new_rx_count, new_tx_count; + int ret = 0; if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) return -EINVAL; @@ -671,11 +675,13 @@ static int iavf_set_ringparam(struct net_device *netdev, } if (netif_running(netdev)) { - adapter->flags |= IAVF_FLAG_RESET_NEEDED; - queue_work(adapter->wq, &adapter->reset_task); + iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); + ret = iavf_wait_for_reset(adapter); + if (ret) + netdev_warn(netdev, "Changing ring parameters timeout or interrupted waiting for reset"); } - return 0; + return ret; } /** @@ -1283,6 +1289,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe fltr->ip_mask.src_port = fsp->m_u.tcp_ip4_spec.psrc; fltr->ip_mask.dst_port = fsp->m_u.tcp_ip4_spec.pdst; fltr->ip_mask.tos = fsp->m_u.tcp_ip4_spec.tos; + fltr->ip_ver = 4; break; case AH_V4_FLOW: case ESP_V4_FLOW: @@ -1294,6 +1301,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe fltr->ip_mask.v4_addrs.dst_ip = fsp->m_u.ah_ip4_spec.ip4dst; fltr->ip_mask.spi = fsp->m_u.ah_ip4_spec.spi; fltr->ip_mask.tos = fsp->m_u.ah_ip4_spec.tos; + fltr->ip_ver = 4; break; case IPV4_USER_FLOW: fltr->ip_data.v4_addrs.src_ip = fsp->h_u.usr_ip4_spec.ip4src; @@ -1306,6 +1314,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe fltr->ip_mask.l4_header = fsp->m_u.usr_ip4_spec.l4_4_bytes; fltr->ip_mask.tos = fsp->m_u.usr_ip4_spec.tos; fltr->ip_mask.proto = fsp->m_u.usr_ip4_spec.proto; + fltr->ip_ver = 4; break; case TCP_V6_FLOW: case UDP_V6_FLOW: @@ -1324,6 +1333,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe fltr->ip_mask.src_port = fsp->m_u.tcp_ip6_spec.psrc; fltr->ip_mask.dst_port = fsp->m_u.tcp_ip6_spec.pdst; fltr->ip_mask.tclass = fsp->m_u.tcp_ip6_spec.tclass; + fltr->ip_ver = 6; break; case AH_V6_FLOW: case ESP_V6_FLOW: @@ -1339,6 +1349,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe sizeof(struct in6_addr)); fltr->ip_mask.spi = fsp->m_u.ah_ip6_spec.spi; fltr->ip_mask.tclass = fsp->m_u.ah_ip6_spec.tclass; + fltr->ip_ver = 6; break; case IPV6_USER_FLOW: memcpy(&fltr->ip_data.v6_addrs.src_ip, fsp->h_u.usr_ip6_spec.ip6src, @@ -1355,6 +1366,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe fltr->ip_mask.l4_header = fsp->m_u.usr_ip6_spec.l4_4_bytes; fltr->ip_mask.tclass = fsp->m_u.usr_ip6_spec.tclass; fltr->ip_mask.proto = fsp->m_u.usr_ip6_spec.l4_proto; + fltr->ip_ver = 6; break; case ETHER_FLOW: fltr->eth_data.etype = fsp->h_u.ether_spec.h_proto; @@ -1365,6 +1377,10 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe return -EINVAL; } + err = iavf_validate_fdir_fltr_masks(adapter, fltr); + if (err) + return err; + if (iavf_fdir_is_dup_fltr(adapter, fltr)) return -EEXIST; @@ -1395,14 +1411,15 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx if (fsp->flow_type & FLOW_MAC_EXT) return -EINVAL; + spin_lock_bh(&adapter->fdir_fltr_lock); if (adapter->fdir_active_fltr >= IAVF_MAX_FDIR_FILTERS) { + spin_unlock_bh(&adapter->fdir_fltr_lock); dev_err(&adapter->pdev->dev, "Unable to add Flow Director filter because VF reached the limit of max allowed filters (%u)\n", IAVF_MAX_FDIR_FILTERS); return -ENOSPC; } - spin_lock_bh(&adapter->fdir_fltr_lock); if (iavf_find_fdir_fltr_by_loc(adapter, fsp->location)) { dev_err(&adapter->pdev->dev, "Failed to add Flow Director filter, it already exists\n"); spin_unlock_bh(&adapter->fdir_fltr_lock); @@ -1775,7 +1792,9 @@ static int iavf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, case ETHTOOL_GRXCLSRLCNT: if (!FDIR_FLTR_SUPPORT(adapter)) break; + spin_lock_bh(&adapter->fdir_fltr_lock); cmd->rule_cnt = adapter->fdir_active_fltr; + spin_unlock_bh(&adapter->fdir_fltr_lock); cmd->data = IAVF_MAX_FDIR_FILTERS; ret = 0; break; @@ -1830,7 +1849,7 @@ static int iavf_set_channels(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); u32 num_req = ch->combined_count; - int i; + int ret = 0; if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && adapter->num_tc) { @@ -1852,22 +1871,13 @@ static int iavf_set_channels(struct net_device *netdev, adapter->num_req_queues = num_req; adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED; - iavf_schedule_reset(adapter); + iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); - /* wait for the reset is done */ - for (i = 0; i < IAVF_RESET_WAIT_COMPLETE_COUNT; i++) { - msleep(IAVF_RESET_WAIT_MS); - if (adapter->flags & IAVF_FLAG_RESET_PENDING) - continue; - break; - } - if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) { - adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; - adapter->num_active_queues = num_req; - return -EOPNOTSUPP; - } + ret = iavf_wait_for_reset(adapter); + if (ret) + netdev_warn(netdev, "Changing channel count timeout or interrupted waiting for reset"); - return 0; + return ret; } /** diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.c b/drivers/net/ethernet/intel/iavf/iavf_fdir.c index 6146203efd84..03e774bd2a5b 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_fdir.c +++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.c @@ -18,6 +18,79 @@ static const struct in6_addr ipv6_addr_full_mask = { } }; +static const struct in6_addr ipv6_addr_zero_mask = { + .in6_u = { + .u6_addr8 = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + } + } +}; + +/** + * iavf_validate_fdir_fltr_masks - validate Flow Director filter fields masks + * @adapter: pointer to the VF adapter structure + * @fltr: Flow Director filter data structure + * + * Returns 0 if all masks of packet fields are either full or empty. Returns + * error on at least one partial mask. + */ +int iavf_validate_fdir_fltr_masks(struct iavf_adapter *adapter, + struct iavf_fdir_fltr *fltr) +{ + if (fltr->eth_mask.etype && fltr->eth_mask.etype != htons(U16_MAX)) + goto partial_mask; + + if (fltr->ip_ver == 4) { + if (fltr->ip_mask.v4_addrs.src_ip && + fltr->ip_mask.v4_addrs.src_ip != htonl(U32_MAX)) + goto partial_mask; + + if (fltr->ip_mask.v4_addrs.dst_ip && + fltr->ip_mask.v4_addrs.dst_ip != htonl(U32_MAX)) + goto partial_mask; + + if (fltr->ip_mask.tos && fltr->ip_mask.tos != U8_MAX) + goto partial_mask; + } else if (fltr->ip_ver == 6) { + if (memcmp(&fltr->ip_mask.v6_addrs.src_ip, &ipv6_addr_zero_mask, + sizeof(struct in6_addr)) && + memcmp(&fltr->ip_mask.v6_addrs.src_ip, &ipv6_addr_full_mask, + sizeof(struct in6_addr))) + goto partial_mask; + + if (memcmp(&fltr->ip_mask.v6_addrs.dst_ip, &ipv6_addr_zero_mask, + sizeof(struct in6_addr)) && + memcmp(&fltr->ip_mask.v6_addrs.dst_ip, &ipv6_addr_full_mask, + sizeof(struct in6_addr))) + goto partial_mask; + + if (fltr->ip_mask.tclass && fltr->ip_mask.tclass != U8_MAX) + goto partial_mask; + } + + if (fltr->ip_mask.proto && fltr->ip_mask.proto != U8_MAX) + goto partial_mask; + + if (fltr->ip_mask.src_port && fltr->ip_mask.src_port != htons(U16_MAX)) + goto partial_mask; + + if (fltr->ip_mask.dst_port && fltr->ip_mask.dst_port != htons(U16_MAX)) + goto partial_mask; + + if (fltr->ip_mask.spi && fltr->ip_mask.spi != htonl(U32_MAX)) + goto partial_mask; + + if (fltr->ip_mask.l4_header && + fltr->ip_mask.l4_header != htonl(U32_MAX)) + goto partial_mask; + + return 0; + +partial_mask: + dev_err(&adapter->pdev->dev, "Failed to add Flow Director filter, partial masks are not supported\n"); + return -EOPNOTSUPP; +} + /** * iavf_pkt_udp_no_pay_len - the length of UDP packet without payload * @fltr: Flow Director filter data structure @@ -263,8 +336,6 @@ iavf_fill_fdir_ip4_hdr(struct iavf_fdir_fltr *fltr, VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, DST); } - fltr->ip_ver = 4; - return 0; } @@ -309,8 +380,6 @@ iavf_fill_fdir_ip6_hdr(struct iavf_fdir_fltr *fltr, VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, DST); } - fltr->ip_ver = 6; - return 0; } @@ -722,7 +791,9 @@ void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *f bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr) { struct iavf_fdir_fltr *tmp; + bool ret = false; + spin_lock_bh(&adapter->fdir_fltr_lock); list_for_each_entry(tmp, &adapter->fdir_list_head, list) { if (tmp->flow_type != fltr->flow_type) continue; @@ -732,11 +803,14 @@ bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr * !memcmp(&tmp->ip_data, &fltr->ip_data, sizeof(fltr->ip_data)) && !memcmp(&tmp->ext_data, &fltr->ext_data, - sizeof(fltr->ext_data))) - return true; + sizeof(fltr->ext_data))) { + ret = true; + break; + } } + spin_unlock_bh(&adapter->fdir_fltr_lock); - return false; + return ret; } /** diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.h b/drivers/net/ethernet/intel/iavf/iavf_fdir.h index 33c55c366315..9eb9f73f6adf 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_fdir.h +++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.h @@ -110,6 +110,8 @@ struct iavf_fdir_fltr { struct virtchnl_fdir_add vc_add_msg; }; +int iavf_validate_fdir_fltr_masks(struct iavf_adapter *adapter, + struct iavf_fdir_fltr *fltr); int iavf_fill_fdir_add_msg(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr); void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr); bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 4a66873882d1..9610ca770349 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -167,6 +167,45 @@ static struct iavf_adapter *iavf_pdev_to_adapter(struct pci_dev *pdev) } /** + * iavf_is_reset_in_progress - Check if a reset is in progress + * @adapter: board private structure + */ +static bool iavf_is_reset_in_progress(struct iavf_adapter *adapter) +{ + if (adapter->state == __IAVF_RESETTING || + adapter->flags & (IAVF_FLAG_RESET_PENDING | + IAVF_FLAG_RESET_NEEDED)) + return true; + + return false; +} + +/** + * iavf_wait_for_reset - Wait for reset to finish. + * @adapter: board private structure + * + * Returns 0 if reset finished successfully, negative on timeout or interrupt. + */ +int iavf_wait_for_reset(struct iavf_adapter *adapter) +{ + int ret = wait_event_interruptible_timeout(adapter->reset_waitqueue, + !iavf_is_reset_in_progress(adapter), + msecs_to_jiffies(5000)); + + /* If ret < 0 then it means wait was interrupted. + * If ret == 0 then it means we got a timeout while waiting + * for reset to finish. + * If ret > 0 it means reset has finished. + */ + if (ret > 0) + return 0; + else if (ret < 0) + return -EINTR; + else + return -EBUSY; +} + +/** * iavf_allocate_dma_mem_d - OS specific memory alloc for shared code * @hw: pointer to the HW structure * @mem: ptr to mem struct to fill out @@ -192,12 +231,11 @@ enum iavf_status iavf_allocate_dma_mem_d(struct iavf_hw *hw, } /** - * iavf_free_dma_mem_d - OS specific memory free for shared code + * iavf_free_dma_mem - wrapper for DMA memory freeing * @hw: pointer to the HW structure * @mem: ptr to mem struct to free **/ -enum iavf_status iavf_free_dma_mem_d(struct iavf_hw *hw, - struct iavf_dma_mem *mem) +enum iavf_status iavf_free_dma_mem(struct iavf_hw *hw, struct iavf_dma_mem *mem) { struct iavf_adapter *adapter = (struct iavf_adapter *)hw->back; @@ -209,13 +247,13 @@ enum iavf_status iavf_free_dma_mem_d(struct iavf_hw *hw, } /** - * iavf_allocate_virt_mem_d - OS specific memory alloc for shared code + * iavf_allocate_virt_mem - virt memory alloc wrapper * @hw: pointer to the HW structure * @mem: ptr to mem struct to fill out * @size: size of memory requested **/ -enum iavf_status iavf_allocate_virt_mem_d(struct iavf_hw *hw, - struct iavf_virt_mem *mem, u32 size) +enum iavf_status iavf_allocate_virt_mem(struct iavf_hw *hw, + struct iavf_virt_mem *mem, u32 size) { if (!mem) return IAVF_ERR_PARAM; @@ -230,20 +268,13 @@ enum iavf_status iavf_allocate_virt_mem_d(struct iavf_hw *hw, } /** - * iavf_free_virt_mem_d - OS specific memory free for shared code + * iavf_free_virt_mem - virt memory free wrapper * @hw: pointer to the HW structure * @mem: ptr to mem struct to free **/ -enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw, - struct iavf_virt_mem *mem) +void iavf_free_virt_mem(struct iavf_hw *hw, struct iavf_virt_mem *mem) { - if (!mem) - return IAVF_ERR_PARAM; - - /* it's ok to kfree a NULL pointer */ kfree(mem->va); - - return 0; } /** @@ -253,7 +284,7 @@ enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw, * * Returns 0 on success, negative on failure **/ -int iavf_lock_timeout(struct mutex *lock, unsigned int msecs) +static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs) { unsigned int wait, delay = 10; @@ -270,12 +301,14 @@ int iavf_lock_timeout(struct mutex *lock, unsigned int msecs) /** * iavf_schedule_reset - Set the flags and schedule a reset event * @adapter: board private structure + * @flags: IAVF_FLAG_RESET_PENDING or IAVF_FLAG_RESET_NEEDED **/ -void iavf_schedule_reset(struct iavf_adapter *adapter) +void iavf_schedule_reset(struct iavf_adapter *adapter, u64 flags) { - if (!(adapter->flags & - (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED))) { - adapter->flags |= IAVF_FLAG_RESET_NEEDED; + if (!test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section) && + !(adapter->flags & + (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED))) { + adapter->flags |= flags; queue_work(adapter->wq, &adapter->reset_task); } } @@ -303,7 +336,7 @@ static void iavf_tx_timeout(struct net_device *netdev, unsigned int txqueue) struct iavf_adapter *adapter = netdev_priv(netdev); adapter->tx_timeout_count++; - iavf_schedule_reset(adapter); + iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); } /** @@ -362,7 +395,7 @@ static void iavf_irq_disable(struct iavf_adapter *adapter) * iavf_irq_enable_queues - Enable interrupt for all queues * @adapter: board private structure **/ -void iavf_irq_enable_queues(struct iavf_adapter *adapter) +static void iavf_irq_enable_queues(struct iavf_adapter *adapter) { struct iavf_hw *hw = &adapter->hw; int i; @@ -1003,44 +1036,40 @@ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, * * Do not call this with mac_vlan_list_lock! **/ -int iavf_replace_primary_mac(struct iavf_adapter *adapter, - const u8 *new_mac) +static int iavf_replace_primary_mac(struct iavf_adapter *adapter, + const u8 *new_mac) { struct iavf_hw *hw = &adapter->hw; - struct iavf_mac_filter *f; + struct iavf_mac_filter *new_f; + struct iavf_mac_filter *old_f; spin_lock_bh(&adapter->mac_vlan_list_lock); - list_for_each_entry(f, &adapter->mac_filter_list, list) { - f->is_primary = false; + new_f = iavf_add_filter(adapter, new_mac); + if (!new_f) { + spin_unlock_bh(&adapter->mac_vlan_list_lock); + return -ENOMEM; } - f = iavf_find_filter(adapter, hw->mac.addr); - if (f) { - f->remove = true; + old_f = iavf_find_filter(adapter, hw->mac.addr); + if (old_f) { + old_f->is_primary = false; + old_f->remove = true; adapter->aq_required |= IAVF_FLAG_AQ_DEL_MAC_FILTER; } - - f = iavf_add_filter(adapter, new_mac); - - if (f) { - /* Always send the request to add if changing primary MAC - * even if filter is already present on the list - */ - f->is_primary = true; - f->add = true; - adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; - ether_addr_copy(hw->mac.addr, new_mac); - } + /* Always send the request to add if changing primary MAC, + * even if filter is already present on the list + */ + new_f->is_primary = true; + new_f->add = true; + adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; + ether_addr_copy(hw->mac.addr, new_mac); spin_unlock_bh(&adapter->mac_vlan_list_lock); /* schedule the watchdog task to immediately process the request */ - if (f) { - mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); - return 0; - } - return -ENOMEM; + mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); + return 0; } /** @@ -1663,10 +1692,10 @@ static int iavf_set_interrupt_capability(struct iavf_adapter *adapter) adapter->msix_entries[vector].entry = vector; err = iavf_acquire_msix_vectors(adapter, v_budget); + if (!err) + iavf_schedule_finish_config(adapter); out: - netif_set_real_num_rx_queues(adapter->netdev, pairs); - netif_set_real_num_tx_queues(adapter->netdev, pairs); return err; } @@ -1840,19 +1869,16 @@ static int iavf_alloc_q_vectors(struct iavf_adapter *adapter) static void iavf_free_q_vectors(struct iavf_adapter *adapter) { int q_idx, num_q_vectors; - int napi_vectors; if (!adapter->q_vectors) return; num_q_vectors = adapter->num_msix_vectors - NONQ_VECS; - napi_vectors = adapter->num_active_queues; for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { struct iavf_q_vector *q_vector = &adapter->q_vectors[q_idx]; - if (q_idx < napi_vectors) - netif_napi_del(&q_vector->napi); + netif_napi_del(&q_vector->napi); } kfree(adapter->q_vectors); adapter->q_vectors = NULL; @@ -1863,7 +1889,7 @@ static void iavf_free_q_vectors(struct iavf_adapter *adapter) * @adapter: board private structure * **/ -void iavf_reset_interrupt_capability(struct iavf_adapter *adapter) +static void iavf_reset_interrupt_capability(struct iavf_adapter *adapter) { if (!adapter->msix_entries) return; @@ -1878,7 +1904,7 @@ void iavf_reset_interrupt_capability(struct iavf_adapter *adapter) * @adapter: board private structure to initialize * **/ -int iavf_init_interrupt_scheme(struct iavf_adapter *adapter) +static int iavf_init_interrupt_scheme(struct iavf_adapter *adapter) { int err; @@ -1889,9 +1915,7 @@ int iavf_init_interrupt_scheme(struct iavf_adapter *adapter) goto err_alloc_queues; } - rtnl_lock(); err = iavf_set_interrupt_capability(adapter); - rtnl_unlock(); if (err) { dev_err(&adapter->pdev->dev, "Unable to setup interrupt capabilities\n"); @@ -1944,15 +1968,16 @@ static void iavf_free_rss(struct iavf_adapter *adapter) /** * iavf_reinit_interrupt_scheme - Reallocate queues and vectors * @adapter: board private structure + * @running: true if adapter->state == __IAVF_RUNNING * * Returns 0 on success, negative on failure **/ -static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter) +static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter, bool running) { struct net_device *netdev = adapter->netdev; int err; - if (netif_running(netdev)) + if (running) iavf_free_traffic_irqs(adapter); iavf_free_misc_irq(adapter); iavf_reset_interrupt_capability(adapter); @@ -1977,6 +2002,78 @@ err: } /** + * iavf_finish_config - do all netdev work that needs RTNL + * @work: our work_struct + * + * Do work that needs both RTNL and crit_lock. + **/ +static void iavf_finish_config(struct work_struct *work) +{ + struct iavf_adapter *adapter; + int pairs, err; + + adapter = container_of(work, struct iavf_adapter, finish_config); + + /* Always take RTNL first to prevent circular lock dependency */ + rtnl_lock(); + mutex_lock(&adapter->crit_lock); + + if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) && + adapter->netdev_registered && + !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) { + netdev_update_features(adapter->netdev); + adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES; + } + + switch (adapter->state) { + case __IAVF_DOWN: + if (!adapter->netdev_registered) { + err = register_netdevice(adapter->netdev); + if (err) { + dev_err(&adapter->pdev->dev, "Unable to register netdev (%d)\n", + err); + + /* go back and try again.*/ + iavf_free_rss(adapter); + iavf_free_misc_irq(adapter); + iavf_reset_interrupt_capability(adapter); + iavf_change_state(adapter, + __IAVF_INIT_CONFIG_ADAPTER); + goto out; + } + adapter->netdev_registered = true; + } + + /* Set the real number of queues when reset occurs while + * state == __IAVF_DOWN + */ + fallthrough; + case __IAVF_RUNNING: + pairs = adapter->num_active_queues; + netif_set_real_num_rx_queues(adapter->netdev, pairs); + netif_set_real_num_tx_queues(adapter->netdev, pairs); + break; + + default: + break; + } + +out: + mutex_unlock(&adapter->crit_lock); + rtnl_unlock(); +} + +/** + * iavf_schedule_finish_config - Set the flags and schedule a reset event + * @adapter: board private structure + **/ +void iavf_schedule_finish_config(struct iavf_adapter *adapter) +{ + if (!test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) + queue_work(adapter->wq, &adapter->finish_config); +} + +/** * iavf_process_aq_command - process aq_required flags * and sends aq command * @adapter: pointer to iavf adapter structure @@ -2176,7 +2273,7 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) * the watchdog if any changes are requested to expedite the request via * virtchnl. **/ -void +static void iavf_set_vlan_offload_features(struct iavf_adapter *adapter, netdev_features_t prev_features, netdev_features_t features) @@ -2383,7 +2480,7 @@ int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter) adapter->vsi_res->num_queue_pairs); adapter->flags |= IAVF_FLAG_REINIT_MSIX_NEEDED; adapter->num_req_queues = adapter->vsi_res->num_queue_pairs; - iavf_schedule_reset(adapter); + iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); return -EAGAIN; } @@ -2613,22 +2710,8 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) netif_carrier_off(netdev); adapter->link_up = false; - - /* set the semaphore to prevent any callbacks after device registration - * up to time when state of driver will be set to __IAVF_DOWN - */ - rtnl_lock(); - if (!adapter->netdev_registered) { - err = register_netdevice(netdev); - if (err) { - rtnl_unlock(); - goto err_register; - } - } - - adapter->netdev_registered = true; - netif_tx_stop_all_queues(netdev); + if (CLIENT_ALLOWED(adapter)) { err = iavf_lan_add_device(adapter); if (err) @@ -2641,7 +2724,6 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) iavf_change_state(adapter, __IAVF_DOWN); set_bit(__IAVF_VSI_DOWN, adapter->vsi.state); - rtnl_unlock(); iavf_misc_irq_enable(adapter); wake_up(&adapter->down_waitqueue); @@ -2661,10 +2743,11 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) /* request initial VLAN offload settings */ iavf_set_vlan_offload_features(adapter, 0, netdev->features); + iavf_schedule_finish_config(adapter); return; + err_mem: iavf_free_rss(adapter); -err_register: iavf_free_misc_irq(adapter); err_sw_init: iavf_reset_interrupt_capability(adapter); @@ -2691,26 +2774,9 @@ static void iavf_watchdog_task(struct work_struct *work) goto restart_watchdog; } - if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) && - adapter->netdev_registered && - !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section) && - rtnl_trylock()) { - netdev_update_features(adapter->netdev); - rtnl_unlock(); - adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES; - } - if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) iavf_change_state(adapter, __IAVF_COMM_FAILED); - if (adapter->flags & IAVF_FLAG_RESET_NEEDED) { - adapter->aq_required = 0; - adapter->current_op = VIRTCHNL_OP_UNKNOWN; - mutex_unlock(&adapter->crit_lock); - queue_work(adapter->wq, &adapter->reset_task); - return; - } - switch (adapter->state) { case __IAVF_STARTUP: iavf_startup(adapter); @@ -2838,11 +2904,10 @@ static void iavf_watchdog_task(struct work_struct *work) /* check for hw reset */ reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK; if (!reg_val) { - adapter->flags |= IAVF_FLAG_RESET_PENDING; adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; dev_err(&adapter->pdev->dev, "Hardware reset detected\n"); - queue_work(adapter->wq, &adapter->reset_task); + iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING); mutex_unlock(&adapter->crit_lock); queue_delayed_work(adapter->wq, &adapter->watchdog_task, HZ * 2); @@ -2952,11 +3017,6 @@ static void iavf_reset_task(struct work_struct *work) int i = 0, err; bool running; - /* Detach interface to avoid subsequent NDO callbacks */ - rtnl_lock(); - netif_device_detach(netdev); - rtnl_unlock(); - /* When device is being removed it doesn't make sense to run the reset * task, just return in such a case. */ @@ -2964,7 +3024,7 @@ static void iavf_reset_task(struct work_struct *work) if (adapter->state != __IAVF_REMOVE) queue_work(adapter->wq, &adapter->reset_task); - goto reset_finish; + return; } while (!mutex_trylock(&adapter->client_lock)) @@ -3022,11 +3082,6 @@ static void iavf_reset_task(struct work_struct *work) iavf_disable_vf(adapter); mutex_unlock(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); - if (netif_running(netdev)) { - rtnl_lock(); - dev_close(netdev); - rtnl_unlock(); - } return; /* Do not attempt to reinit. It's dead, Jim. */ } @@ -3068,7 +3123,7 @@ continue_reset: if ((adapter->flags & IAVF_FLAG_REINIT_MSIX_NEEDED) || (adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED)) { - err = iavf_reinit_interrupt_scheme(adapter); + err = iavf_reinit_interrupt_scheme(adapter, running); if (err) goto reset_err; } @@ -3163,10 +3218,11 @@ continue_reset: adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; + wake_up(&adapter->reset_waitqueue); mutex_unlock(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); - goto reset_finish; + return; reset_err: if (running) { set_bit(__IAVF_VSI_DOWN, adapter->vsi.state); @@ -3176,21 +3232,7 @@ reset_err: mutex_unlock(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); - - if (netif_running(netdev)) { - /* Close device to ensure that Tx queues will not be started - * during netif_device_attach() at the end of the reset task. - */ - rtnl_lock(); - dev_close(netdev); - rtnl_unlock(); - } - dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); -reset_finish: - rtnl_lock(); - netif_device_attach(netdev); - rtnl_unlock(); } /** @@ -3208,9 +3250,6 @@ static void iavf_adminq_task(struct work_struct *work) u32 val, oldval; u16 pending; - if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) - goto out; - if (!mutex_trylock(&adapter->crit_lock)) { if (adapter->state == __IAVF_REMOVE) return; @@ -3219,10 +3258,13 @@ static void iavf_adminq_task(struct work_struct *work) goto out; } + if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) + goto unlock; + event.buf_len = IAVF_MAX_AQ_BUF_SIZE; event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL); if (!event.msg_buf) - goto out; + goto unlock; do { ret = iavf_clean_arq_element(hw, &event, &pending); @@ -3237,11 +3279,8 @@ static void iavf_adminq_task(struct work_struct *work) if (pending != 0) memset(event.msg_buf, 0, IAVF_MAX_AQ_BUF_SIZE); } while (pending); - mutex_unlock(&adapter->crit_lock); - if ((adapter->flags & - (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) || - adapter->state == __IAVF_RESETTING) + if (iavf_is_reset_in_progress(adapter)) goto freedom; /* check for error indications */ @@ -3283,6 +3322,8 @@ static void iavf_adminq_task(struct work_struct *work) freedom: kfree(event.msg_buf); +unlock: + mutex_unlock(&adapter->crit_lock); out: /* re-enable Admin queue interrupt cause */ iavf_misc_irq_enable(adapter); @@ -4327,6 +4368,7 @@ static int iavf_close(struct net_device *netdev) static int iavf_change_mtu(struct net_device *netdev, int new_mtu) { struct iavf_adapter *adapter = netdev_priv(netdev); + int ret = 0; netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); @@ -4337,11 +4379,15 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu) } if (netif_running(netdev)) { - adapter->flags |= IAVF_FLAG_RESET_NEEDED; - queue_work(adapter->wq, &adapter->reset_task); + iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); + ret = iavf_wait_for_reset(adapter); + if (ret < 0) + netdev_warn(netdev, "MTU change interrupted waiting for reset"); + else if (ret) + netdev_warn(netdev, "MTU change timed out waiting for reset"); } - return 0; + return ret; } #define NETIF_VLAN_OFFLOAD_FEATURES (NETIF_F_HW_VLAN_CTAG_RX | \ @@ -4934,6 +4980,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&adapter->reset_task, iavf_reset_task); INIT_WORK(&adapter->adminq_task, iavf_adminq_task); + INIT_WORK(&adapter->finish_config, iavf_finish_config); INIT_DELAYED_WORK(&adapter->watchdog_task, iavf_watchdog_task); INIT_DELAYED_WORK(&adapter->client_task, iavf_client_task); queue_delayed_work(adapter->wq, &adapter->watchdog_task, @@ -4942,6 +4989,9 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Setup the wait queue for indicating transition to down status */ init_waitqueue_head(&adapter->down_waitqueue); + /* Setup the wait queue for indicating transition to running state */ + init_waitqueue_head(&adapter->reset_waitqueue); + /* Setup the wait queue for indicating virtchannel events */ init_waitqueue_head(&adapter->vc_waitqueue); @@ -5073,13 +5123,15 @@ static void iavf_remove(struct pci_dev *pdev) usleep_range(500, 1000); } cancel_delayed_work_sync(&adapter->watchdog_task); + cancel_work_sync(&adapter->finish_config); + rtnl_lock(); if (adapter->netdev_registered) { - rtnl_lock(); unregister_netdevice(netdev); adapter->netdev_registered = false; - rtnl_unlock(); } + rtnl_unlock(); + if (CLIENT_ALLOWED(adapter)) { err = iavf_lan_del_device(adapter); if (err) diff --git a/drivers/net/ethernet/intel/iavf/iavf_osdep.h b/drivers/net/ethernet/intel/iavf/iavf_osdep.h index a452ce90679a..77d33deaabb5 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_osdep.h +++ b/drivers/net/ethernet/intel/iavf/iavf_osdep.h @@ -13,12 +13,6 @@ /* get readq/writeq support for 32 bit kernels, use the low-first version */ #include <linux/io-64-nonatomic-lo-hi.h> -/* File to be the magic between shared code and - * actual OS primitives - */ - -#define hw_dbg(hw, S, A...) do {} while (0) - #define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg))) #define rd32(a, reg) readl((a)->hw_addr + (reg)) @@ -35,14 +29,11 @@ struct iavf_dma_mem { #define iavf_allocate_dma_mem(h, m, unused, s, a) \ iavf_allocate_dma_mem_d(h, m, s, a) -#define iavf_free_dma_mem(h, m) iavf_free_dma_mem_d(h, m) struct iavf_virt_mem { void *va; u32 size; }; -#define iavf_allocate_virt_mem(h, m, s) iavf_allocate_virt_mem_d(h, m, s) -#define iavf_free_virt_mem(h, m) iavf_free_virt_mem_d(h, m) #define iavf_debug(h, m, s, ...) \ do { \ diff --git a/drivers/net/ethernet/intel/iavf/iavf_prototype.h b/drivers/net/ethernet/intel/iavf/iavf_prototype.h index edebfbbcffdc..940cb4203fbe 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_prototype.h +++ b/drivers/net/ethernet/intel/iavf/iavf_prototype.h @@ -40,12 +40,8 @@ enum iavf_status iavf_aq_queue_shutdown(struct iavf_hw *hw, bool unloading); const char *iavf_aq_str(struct iavf_hw *hw, enum iavf_admin_queue_err aq_err); const char *iavf_stat_str(struct iavf_hw *hw, enum iavf_status stat_err); -enum iavf_status iavf_aq_get_rss_lut(struct iavf_hw *hw, u16 seid, - bool pf_lut, u8 *lut, u16 lut_size); enum iavf_status iavf_aq_set_rss_lut(struct iavf_hw *hw, u16 seid, bool pf_lut, u8 *lut, u16 lut_size); -enum iavf_status iavf_aq_get_rss_key(struct iavf_hw *hw, u16 seid, - struct iavf_aqc_get_set_rss_key_data *key); enum iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 seid, struct iavf_aqc_get_set_rss_key_data *key); @@ -60,7 +56,6 @@ static inline struct iavf_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype) void iavf_vf_parse_hw_config(struct iavf_hw *hw, struct virtchnl_vf_resource *msg); -enum iavf_status iavf_vf_reset(struct iavf_hw *hw); enum iavf_status iavf_aq_send_msg_to_pf(struct iavf_hw *hw, enum virtchnl_ops v_opcode, enum iavf_status v_retval, diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index e989feda133c..8c5f6096b002 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -54,7 +54,7 @@ static void iavf_unmap_and_free_tx_resource(struct iavf_ring *ring, * iavf_clean_tx_ring - Free any empty Tx buffers * @tx_ring: ring to be cleaned **/ -void iavf_clean_tx_ring(struct iavf_ring *tx_ring) +static void iavf_clean_tx_ring(struct iavf_ring *tx_ring) { unsigned long bi_size; u16 i; @@ -110,7 +110,7 @@ void iavf_free_tx_resources(struct iavf_ring *tx_ring) * Since there is no access to the ring head register * in XL710, we need to use our local copies **/ -u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw) +static u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw) { u32 head, tail; @@ -128,6 +128,24 @@ u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw) } /** + * iavf_force_wb - Issue SW Interrupt so HW does a wb + * @vsi: the VSI we care about + * @q_vector: the vector on which to force writeback + **/ +static void iavf_force_wb(struct iavf_vsi *vsi, struct iavf_q_vector *q_vector) +{ + u32 val = IAVF_VFINT_DYN_CTLN1_INTENA_MASK | + IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */ + IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_MASK | + IAVF_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK + /* allow 00 to be written to the index */; + + wr32(&vsi->back->hw, + IAVF_VFINT_DYN_CTLN1(q_vector->reg_idx), + val); +} + +/** * iavf_detect_recover_hung - Function to detect and recover hung_queues * @vsi: pointer to vsi struct with tx queues * @@ -352,25 +370,6 @@ static void iavf_enable_wb_on_itr(struct iavf_vsi *vsi, q_vector->arm_wb_state = true; } -/** - * iavf_force_wb - Issue SW Interrupt so HW does a wb - * @vsi: the VSI we care about - * @q_vector: the vector on which to force writeback - * - **/ -void iavf_force_wb(struct iavf_vsi *vsi, struct iavf_q_vector *q_vector) -{ - u32 val = IAVF_VFINT_DYN_CTLN1_INTENA_MASK | - IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */ - IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_MASK | - IAVF_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK - /* allow 00 to be written to the index */; - - wr32(&vsi->back->hw, - IAVF_VFINT_DYN_CTLN1(q_vector->reg_idx), - val); -} - static inline bool iavf_container_is_rx(struct iavf_q_vector *q_vector, struct iavf_ring_container *rc) { @@ -687,7 +686,7 @@ err: * iavf_clean_rx_ring - Free Rx buffers * @rx_ring: ring to be cleaned **/ -void iavf_clean_rx_ring(struct iavf_ring *rx_ring) +static void iavf_clean_rx_ring(struct iavf_ring *rx_ring) { unsigned long bi_size; u16 i; diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h index 2624bf6d009e..7e6ee32d19b6 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.h +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h @@ -442,15 +442,11 @@ static inline unsigned int iavf_rx_pg_order(struct iavf_ring *ring) bool iavf_alloc_rx_buffers(struct iavf_ring *rxr, u16 cleaned_count); netdev_tx_t iavf_xmit_frame(struct sk_buff *skb, struct net_device *netdev); -void iavf_clean_tx_ring(struct iavf_ring *tx_ring); -void iavf_clean_rx_ring(struct iavf_ring *rx_ring); int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring); int iavf_setup_rx_descriptors(struct iavf_ring *rx_ring); void iavf_free_tx_resources(struct iavf_ring *tx_ring); void iavf_free_rx_resources(struct iavf_ring *rx_ring); int iavf_napi_poll(struct napi_struct *napi, int budget); -void iavf_force_wb(struct iavf_vsi *vsi, struct iavf_q_vector *q_vector); -u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw); void iavf_detect_recover_hung(struct iavf_vsi *vsi); int __iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size); bool __iavf_chk_linearize(struct sk_buff *skb); diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 7c0578b5457b..be3c007ce90a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -1961,9 +1961,8 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, case VIRTCHNL_EVENT_RESET_IMPENDING: dev_info(&adapter->pdev->dev, "Reset indication received from the PF\n"); if (!(adapter->flags & IAVF_FLAG_RESET_PENDING)) { - adapter->flags |= IAVF_FLAG_RESET_PENDING; dev_info(&adapter->pdev->dev, "Scheduling reset task\n"); - queue_work(adapter->wq, &adapter->reset_task); + iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING); } break; default: @@ -2237,6 +2236,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, iavf_process_config(adapter); adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES; + iavf_schedule_finish_config(adapter); iavf_set_queue_vlan_tag_loc(adapter); @@ -2285,6 +2285,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, case VIRTCHNL_OP_ENABLE_QUEUES: /* enable transmits */ iavf_irq_enable(adapter, true); + wake_up(&adapter->reset_waitqueue); adapter->flags &= ~IAVF_FLAG_QUEUES_DISABLED; break; case VIRTCHNL_OP_DISABLE_QUEUES: diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index 5d89392f969b..817977e3039d 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -18,6 +18,7 @@ ice-y := ice_main.o \ ice_txrx_lib.o \ ice_txrx.o \ ice_fltr.o \ + ice_irq.o \ ice_pf_vsi_vlan_ops.o \ ice_vsi_vlan_ops.o \ ice_vsi_vlan_lib.o \ diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index aa32111afd6e..4ba3d99439a0 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -32,6 +32,7 @@ #include <linux/pkt_sched.h> #include <linux/if_bridge.h> #include <linux/ctype.h> +#include <linux/linkmode.h> #include <linux/bpf.h> #include <linux/btf.h> #include <linux/auxiliary_bus.h> @@ -74,6 +75,7 @@ #include "ice_lag.h" #include "ice_vsi_vlan_ops.h" #include "ice_gnss.h" +#include "ice_irq.h" #define ICE_BAR0 0 #define ICE_REQ_DESC_MULTIPLE 32 @@ -103,11 +105,6 @@ #define ICE_Q_WAIT_RETRY_LIMIT 10 #define ICE_Q_WAIT_MAX_RETRY (5 * ICE_Q_WAIT_RETRY_LIMIT) #define ICE_MAX_LG_RSS_QS 256 -#define ICE_RES_VALID_BIT 0x8000 -#define ICE_RES_MISC_VEC_ID (ICE_RES_VALID_BIT - 1) -#define ICE_RES_RDMA_VEC_ID (ICE_RES_MISC_VEC_ID - 1) -/* All VF control VSIs share the same IRQ, so assign a unique ID for them */ -#define ICE_RES_VF_CTRL_VEC_ID (ICE_RES_RDMA_VEC_ID - 1) #define ICE_INVAL_Q_INDEX 0xffff #define ICE_MAX_RXQS_PER_TC 256 /* Used when setting VSI context per TC Rx queues */ @@ -245,12 +242,6 @@ struct ice_tc_cfg { struct ice_tc_info tc_info[ICE_MAX_TRAFFIC_CLASS]; }; -struct ice_res_tracker { - u16 num_entries; - u16 end; - u16 list[]; -}; - struct ice_qs_cfg { struct mutex *qs_mutex; /* will be assigned to &pf->avail_q_mutex */ unsigned long *pf_map; @@ -348,7 +339,9 @@ struct ice_vsi { u32 rx_buf_failed; u32 rx_page_failed; u16 num_q_vectors; - u16 base_vector; /* IRQ base for OS reserved vectors */ + /* tell if only dynamic irq allocation is allowed */ + bool irq_dyn_alloc; + enum ice_vsi_type type; u16 vsi_num; /* HW (absolute) index of this VSI */ u16 idx; /* software index in pf->vsi[] */ @@ -479,6 +472,7 @@ struct ice_q_vector { char name[ICE_INT_NAME_STR_LEN]; u16 total_events; /* net_dim(): number of interrupts processed */ + struct msi_map irq; } ____cacheline_internodealigned_in_smp; enum ice_pf_flags { @@ -514,6 +508,12 @@ enum ice_pf_flags { ICE_PF_FLAGS_NBITS /* must be last */ }; +enum ice_misc_thread_tasks { + ICE_MISC_THREAD_EXTTS_EVENT, + ICE_MISC_THREAD_TX_TSTAMP, + ICE_MISC_THREAD_NBITS /* must be last */ +}; + struct ice_switchdev_info { struct ice_vsi *control_vsi; struct ice_vsi *uplink_vsi; @@ -539,7 +539,7 @@ struct ice_pf { /* OS reserved IRQ details */ struct msix_entry *msix_entries; - struct ice_res_tracker *irq_tracker; + struct ice_irq_tracker irq_tracker; /* First MSIX vector used by SR-IOV VFs. Calculated by subtracting the * number of MSIX vectors needed for all SR-IOV VFs from the number of * MSIX vectors allowed on this PF. @@ -556,6 +556,7 @@ struct ice_pf { DECLARE_BITMAP(features, ICE_F_MAX); DECLARE_BITMAP(state, ICE_STATE_NBITS); DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS); + DECLARE_BITMAP(misc_thread, ICE_MISC_THREAD_NBITS); unsigned long *avail_txqs; /* bitmap to track PF Tx queue usage */ unsigned long *avail_rxqs; /* bitmap to track PF Rx queue usage */ unsigned long serv_tmr_period; @@ -583,8 +584,7 @@ struct ice_pf { u32 hw_csum_rx_error; u32 oicr_err_reg; - u16 oicr_idx; /* Other interrupt cause MSIX vector index */ - u16 num_avail_sw_msix; /* remaining MSIX SW vectors left unclaimed */ + struct msi_map oicr_irq; /* Other interrupt cause MSIX vector */ u16 max_pf_txqs; /* Total Tx queues PF wide */ u16 max_pf_rxqs; /* Total Rx queues PF wide */ u16 num_lan_msix; /* Total MSIX vectors for base driver */ @@ -670,7 +670,7 @@ ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi, struct ice_q_vector *q_vector) { u32 vector = (vsi && q_vector) ? q_vector->reg_idx : - ((struct ice_pf *)hw->back)->oicr_idx; + ((struct ice_pf *)hw->back)->oicr_irq.index; int itr = ICE_ITR_NONE; u32 val; @@ -821,25 +821,6 @@ static inline bool ice_is_switchdev_running(struct ice_pf *pf) return pf->switchdev.is_running; } -/** - * ice_set_sriov_cap - enable SRIOV in PF flags - * @pf: PF struct - */ -static inline void ice_set_sriov_cap(struct ice_pf *pf) -{ - if (pf->hw.func_caps.common_cap.sr_iov_1_1) - set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags); -} - -/** - * ice_clear_sriov_cap - disable SRIOV in PF flags - * @pf: PF struct - */ -static inline void ice_clear_sriov_cap(struct ice_pf *pf) -{ - clear_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags); -} - #define ICE_FD_STAT_CTR_BLOCK_COUNT 256 #define ICE_FD_STAT_PF_IDX(base_idx) \ ((base_idx) * ICE_FD_STAT_CTR_BLOCK_COUNT) diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 838d9b274d68..63d3e1dcbba5 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -1087,7 +1087,7 @@ struct ice_aqc_get_phy_caps { #define ICE_PHY_TYPE_HIGH_100G_CAUI2 BIT_ULL(2) #define ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC BIT_ULL(3) #define ICE_PHY_TYPE_HIGH_100G_AUI2 BIT_ULL(4) -#define ICE_PHY_TYPE_HIGH_MAX_INDEX 5 +#define ICE_PHY_TYPE_HIGH_MAX_INDEX 4 struct ice_aqc_get_phy_caps_data { __le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */ diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c index fba178e07600..cca0e753f38f 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.c +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c @@ -596,7 +596,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi) { struct net_device *netdev; struct ice_pf *pf; - int base_idx, i; + int i; if (!vsi || vsi->type != ICE_VSI_PF) return 0; @@ -613,10 +613,9 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi) if (unlikely(!netdev->rx_cpu_rmap)) return -EINVAL; - base_idx = vsi->base_vector; ice_for_each_q_vector(vsi, i) if (irq_cpu_rmap_add(netdev->rx_cpu_rmap, - pf->msix_entries[base_idx + i].vector)) { + vsi->q_vectors[i]->irq.virq)) { ice_free_cpu_rx_rmap(vsi); return -EINVAL; } diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 1911d644dfa8..074bf9403cd1 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -103,10 +103,10 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx) { struct ice_pf *pf = vsi->back; struct ice_q_vector *q_vector; + int err; /* allocate q_vector */ - q_vector = devm_kzalloc(ice_pf_to_dev(pf), sizeof(*q_vector), - GFP_KERNEL); + q_vector = kzalloc(sizeof(*q_vector), GFP_KERNEL); if (!q_vector) return -ENOMEM; @@ -118,9 +118,34 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx) q_vector->rx.itr_mode = ITR_DYNAMIC; q_vector->tx.type = ICE_TX_CONTAINER; q_vector->rx.type = ICE_RX_CONTAINER; + q_vector->irq.index = -ENOENT; - if (vsi->type == ICE_VSI_VF) + if (vsi->type == ICE_VSI_VF) { + q_vector->reg_idx = ice_calc_vf_reg_idx(vsi->vf, q_vector); goto out; + } else if (vsi->type == ICE_VSI_CTRL && vsi->vf) { + struct ice_vsi *ctrl_vsi = ice_get_vf_ctrl_vsi(pf, vsi); + + if (ctrl_vsi) { + if (unlikely(!ctrl_vsi->q_vectors)) { + err = -ENOENT; + goto err_free_q_vector; + } + + q_vector->irq = ctrl_vsi->q_vectors[0]->irq; + goto skip_alloc; + } + } + + q_vector->irq = ice_alloc_irq(pf, vsi->irq_dyn_alloc); + if (q_vector->irq.index < 0) { + err = -ENOMEM; + goto err_free_q_vector; + } + +skip_alloc: + q_vector->reg_idx = q_vector->irq.index; + /* only set affinity_mask if the CPU is online */ if (cpu_online(v_idx)) cpumask_set_cpu(v_idx, &q_vector->affinity_mask); @@ -137,6 +162,11 @@ out: vsi->q_vectors[v_idx] = q_vector; return 0; + +err_free_q_vector: + kfree(q_vector); + + return err; } /** @@ -168,7 +198,19 @@ static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx) if (vsi->netdev) netif_napi_del(&q_vector->napi); - devm_kfree(dev, q_vector); + /* release MSIX interrupt if q_vector had interrupt allocated */ + if (q_vector->irq.index < 0) + goto free_q_vector; + + /* only free last VF ctrl vsi interrupt */ + if (vsi->type == ICE_VSI_CTRL && vsi->vf && + ice_get_vf_ctrl_vsi(pf, vsi)) + goto free_q_vector; + + ice_free_irq(pf, q_vector->irq); + +free_q_vector: + kfree(q_vector); vsi->q_vectors[v_idx] = NULL; } @@ -393,7 +435,8 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) /* Receive Packet Data Buffer Size. * The Packet Data Buffer Size is defined in 128 byte units. */ - rlan_ctx.dbuf = ring->rx_buf_len >> ICE_RLAN_CTX_DBUF_S; + rlan_ctx.dbuf = DIV_ROUND_UP(ring->rx_buf_len, + BIT_ULL(ICE_RLAN_CTX_DBUF_S)); /* use 32 byte descriptors */ rlan_ctx.dsize = 1; @@ -758,6 +801,8 @@ void ice_vsi_free_q_vectors(struct ice_vsi *vsi) ice_for_each_q_vector(vsi, v_idx) ice_free_q_vector(vsi, v_idx); + + vsi->num_q_vectors = 0; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index eb2dc0983776..e16d4c83ed5f 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -814,8 +814,7 @@ static void ice_cleanup_fltr_mgmt_struct(struct ice_hw *hw) devm_kfree(ice_hw_to_dev(hw), lst_itr); } } - if (recps[i].root_buf) - devm_kfree(ice_hw_to_dev(hw), recps[i].root_buf); + devm_kfree(ice_hw_to_dev(hw), recps[i].root_buf); } ice_rm_all_sw_replay_rule_info(hw); devm_kfree(ice_hw_to_dev(hw), sw->recp_list); @@ -834,7 +833,7 @@ static int ice_get_fw_log_cfg(struct ice_hw *hw) u16 size; size = sizeof(*config) * ICE_AQC_FW_LOG_ID_MAX; - config = devm_kzalloc(ice_hw_to_dev(hw), size, GFP_KERNEL); + config = kzalloc(size, GFP_KERNEL); if (!config) return -ENOMEM; @@ -857,7 +856,7 @@ static int ice_get_fw_log_cfg(struct ice_hw *hw) } } - devm_kfree(ice_hw_to_dev(hw), config); + kfree(config); return status; } @@ -1011,8 +1010,7 @@ static int ice_cfg_fw_log(struct ice_hw *hw, bool enable) } out: - if (data) - devm_kfree(ice_hw_to_dev(hw), data); + devm_kfree(ice_hw_to_dev(hw), data); return status; } diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index d2faf1baad2f..e7d2474c431c 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -339,8 +339,7 @@ do { \ } \ } \ /* free the buffer info list */ \ - if ((qi)->ring.cmd_buf) \ - devm_kfree(ice_hw_to_dev(hw), (qi)->ring.cmd_buf); \ + devm_kfree(ice_hw_to_dev(hw), (qi)->ring.cmd_buf); \ /* free DMA head */ \ devm_kfree(ice_hw_to_dev(hw), (qi)->ring.dma_head); \ } while (0) @@ -1056,14 +1055,19 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, if (cq->sq.next_to_use == cq->sq.count) cq->sq.next_to_use = 0; wr32(hw, cq->sq.tail, cq->sq.next_to_use); + ice_flush(hw); + + /* Wait a short time before initial ice_sq_done() check, to allow + * hardware time for completion. + */ + udelay(5); timeout = jiffies + ICE_CTL_Q_SQ_CMD_TIMEOUT; do { if (ice_sq_done(hw, cq)) break; - usleep_range(ICE_CTL_Q_SQ_CMD_USEC, - ICE_CTL_Q_SQ_CMD_USEC * 3 / 2); + usleep_range(100, 150); } while (time_before(jiffies, timeout)); /* if ready, copy the desc back to temp */ diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h index 950b7f4a7a05..8f2fd1613a95 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.h +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h @@ -35,7 +35,6 @@ enum ice_ctl_q { /* Control Queue timeout settings - max delay 1s */ #define ICE_CTL_Q_SQ_CMD_TIMEOUT HZ /* Wait max 1s */ -#define ICE_CTL_Q_SQ_CMD_USEC 100 /* Check every 100usec */ #define ICE_CTL_Q_ADMIN_INIT_TIMEOUT 10 /* Count 10 times */ #define ICE_CTL_Q_ADMIN_INIT_MSEC 100 /* Check every 100msec */ diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.h b/drivers/net/ethernet/intel/ice/ice_ddp.h index 37eadb3d27a8..41acfe26df1c 100644 --- a/drivers/net/ethernet/intel/ice/ice_ddp.h +++ b/drivers/net/ethernet/intel/ice/ice_ddp.h @@ -185,7 +185,7 @@ struct ice_buf_hdr { #define ICE_MAX_ENTRIES_IN_BUF(hd_sz, ent_sz) \ ((ICE_PKG_BUF_SIZE - \ - struct_size((struct ice_buf_hdr *)0, section_entry, 1) - (hd_sz)) / \ + struct_size_t(struct ice_buf_hdr, section_entry, 1) - (hd_sz)) / \ (ent_sz)) /* ice package section IDs */ @@ -297,7 +297,7 @@ struct ice_label_section { }; #define ICE_MAX_LABELS_IN_BUF \ - ICE_MAX_ENTRIES_IN_BUF(struct_size((struct ice_label_section *)0, \ + ICE_MAX_ENTRIES_IN_BUF(struct_size_t(struct ice_label_section, \ label, 1) - \ sizeof(struct ice_label), \ sizeof(struct ice_label)) @@ -352,7 +352,7 @@ struct ice_boost_tcam_section { }; #define ICE_MAX_BST_TCAMS_IN_BUF \ - ICE_MAX_ENTRIES_IN_BUF(struct_size((struct ice_boost_tcam_section *)0, \ + ICE_MAX_ENTRIES_IN_BUF(struct_size_t(struct ice_boost_tcam_section, \ tcam, 1) - \ sizeof(struct ice_boost_tcam_entry), \ sizeof(struct ice_boost_tcam_entry)) @@ -372,8 +372,7 @@ struct ice_marker_ptype_tcam_section { }; #define ICE_MAX_MARKER_PTYPE_TCAMS_IN_BUF \ - ICE_MAX_ENTRIES_IN_BUF( \ - struct_size((struct ice_marker_ptype_tcam_section *)0, tcam, \ + ICE_MAX_ENTRIES_IN_BUF(struct_size_t(struct ice_marker_ptype_tcam_section, tcam, \ 1) - \ sizeof(struct ice_marker_ptype_tcam_entry), \ sizeof(struct ice_marker_ptype_tcam_entry)) diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index bc44cc220818..80dc5445b50d 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -1256,8 +1256,6 @@ static const struct devlink_ops ice_devlink_ops = { BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE), .reload_down = ice_devlink_reload_down, .reload_up = ice_devlink_reload_up, - .port_split = ice_devlink_port_split, - .port_unsplit = ice_devlink_port_unsplit, .eswitch_mode_get = ice_eswitch_mode_get, .eswitch_mode_set = ice_eswitch_mode_set, .info_get = ice_devlink_info_get, @@ -1512,6 +1510,11 @@ ice_devlink_set_port_split_options(struct ice_pf *pf, ice_active_port_option = active_idx; } +static const struct devlink_port_ops ice_devlink_port_ops = { + .port_split = ice_devlink_port_split, + .port_unsplit = ice_devlink_port_unsplit, +}; + /** * ice_devlink_create_pf_port - Create a devlink port for this PF * @pf: the PF to create a devlink port for @@ -1551,7 +1554,8 @@ int ice_devlink_create_pf_port(struct ice_pf *pf) devlink_port_attrs_set(devlink_port, &attrs); devlink = priv_to_devlink(pf); - err = devlink_port_register(devlink, devlink_port, vsi->idx); + err = devlink_port_register_with_ops(devlink, devlink_port, vsi->idx, + &ice_devlink_port_ops); if (err) { dev_err(dev, "Failed to create devlink port for PF %d, error %d\n", pf->hw.pf_id, err); diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index f6dd3f8fd936..459e32f6adb5 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -10,16 +10,15 @@ #include "ice_tc_lib.h" /** - * ice_eswitch_add_vf_mac_rule - add adv rule with VF's MAC + * ice_eswitch_add_vf_sp_rule - add adv rule with VF's VSI index * @pf: pointer to PF struct * @vf: pointer to VF struct - * @mac: VF's MAC address * * This function adds advanced rule that forwards packets with - * VF's MAC address (src MAC) to the corresponding switchdev ctrl VSI queue. + * VF's VSI index to the corresponding switchdev ctrl VSI queue. */ -int -ice_eswitch_add_vf_mac_rule(struct ice_pf *pf, struct ice_vf *vf, const u8 *mac) +static int +ice_eswitch_add_vf_sp_rule(struct ice_pf *pf, struct ice_vf *vf) { struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi; struct ice_adv_rule_info rule_info = { 0 }; @@ -32,76 +31,41 @@ ice_eswitch_add_vf_mac_rule(struct ice_pf *pf, struct ice_vf *vf, const u8 *mac) if (!list) return -ENOMEM; - list[0].type = ICE_MAC_OFOS; - ether_addr_copy(list[0].h_u.eth_hdr.src_addr, mac); - eth_broadcast_addr(list[0].m_u.eth_hdr.src_addr); + ice_rule_add_src_vsi_metadata(list); - rule_info.sw_act.flag |= ICE_FLTR_TX; + rule_info.sw_act.flag = ICE_FLTR_TX; rule_info.sw_act.vsi_handle = ctrl_vsi->idx; rule_info.sw_act.fltr_act = ICE_FWD_TO_Q; - rule_info.rx = false; rule_info.sw_act.fwd_id.q_id = hw->func_caps.common_cap.rxq_first_id + ctrl_vsi->rxq_map[vf->vf_id]; rule_info.flags_info.act |= ICE_SINGLE_ACT_LB_ENABLE; rule_info.flags_info.act_valid = true; rule_info.tun_type = ICE_SW_TUN_AND_NON_TUN; + rule_info.src_vsi = vf->lan_vsi_idx; err = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, - vf->repr->mac_rule); + &vf->repr->sp_rule); if (err) - dev_err(ice_pf_to_dev(pf), "Unable to add VF mac rule in switchdev mode for VF %d", + dev_err(ice_pf_to_dev(pf), "Unable to add VF slow-path rule in switchdev mode for VF %d", vf->vf_id); - else - vf->repr->rule_added = true; kfree(list); return err; } /** - * ice_eswitch_replay_vf_mac_rule - replay adv rule with VF's MAC - * @vf: pointer to vF struct - * - * This function replays VF's MAC rule after reset. - */ -void ice_eswitch_replay_vf_mac_rule(struct ice_vf *vf) -{ - int err; - - if (!ice_is_switchdev_running(vf->pf)) - return; - - if (is_valid_ether_addr(vf->hw_lan_addr)) { - err = ice_eswitch_add_vf_mac_rule(vf->pf, vf, - vf->hw_lan_addr); - if (err) { - dev_err(ice_pf_to_dev(vf->pf), "Failed to add MAC %pM for VF %d\n, error %d\n", - vf->hw_lan_addr, vf->vf_id, err); - return; - } - vf->num_mac++; - - ether_addr_copy(vf->dev_lan_addr, vf->hw_lan_addr); - } -} - -/** - * ice_eswitch_del_vf_mac_rule - delete adv rule with VF's MAC + * ice_eswitch_del_vf_sp_rule - delete adv rule with VF's VSI index * @vf: pointer to the VF struct * - * Delete the advanced rule that was used to forward packets with the VF's MAC - * address (src MAC) to the corresponding switchdev ctrl VSI queue. + * Delete the advanced rule that was used to forward packets with the VF's VSI + * index to the corresponding switchdev ctrl VSI queue. */ -void ice_eswitch_del_vf_mac_rule(struct ice_vf *vf) +static void ice_eswitch_del_vf_sp_rule(struct ice_vf *vf) { - if (!ice_is_switchdev_running(vf->pf)) - return; - - if (!vf->repr->rule_added) + if (!vf->repr) return; - ice_rem_adv_rule_by_id(&vf->pf->hw, vf->repr->mac_rule); - vf->repr->rule_added = false; + ice_rem_adv_rule_by_id(&vf->pf->hw, &vf->repr->sp_rule); } /** @@ -237,6 +201,7 @@ ice_eswitch_release_reprs(struct ice_pf *pf, struct ice_vsi *ctrl_vsi) ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof); metadata_dst_free(vf->repr->dst); vf->repr->dst = NULL; + ice_eswitch_del_vf_sp_rule(vf); ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr, ICE_FWD_TO_VSI); @@ -264,25 +229,30 @@ static int ice_eswitch_setup_reprs(struct ice_pf *pf) vf->repr->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, GFP_KERNEL); if (!vf->repr->dst) { - ice_fltr_add_mac_and_broadcast(vsi, - vf->hw_lan_addr, + ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr, + ICE_FWD_TO_VSI); + goto err; + } + + if (ice_eswitch_add_vf_sp_rule(pf, vf)) { + ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr, ICE_FWD_TO_VSI); goto err; } if (ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof)) { - ice_fltr_add_mac_and_broadcast(vsi, - vf->hw_lan_addr, + ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr, ICE_FWD_TO_VSI); + ice_eswitch_del_vf_sp_rule(vf); metadata_dst_free(vf->repr->dst); vf->repr->dst = NULL; goto err; } if (ice_vsi_add_vlan_zero(vsi)) { - ice_fltr_add_mac_and_broadcast(vsi, - vf->hw_lan_addr, + ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr, ICE_FWD_TO_VSI); + ice_eswitch_del_vf_sp_rule(vf); metadata_dst_free(vf->repr->dst); vf->repr->dst = NULL; ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof); @@ -361,6 +331,9 @@ ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev) np = netdev_priv(netdev); vsi = np->vsi; + if (!vsi || !ice_is_switchdev_running(vsi->back)) + return NETDEV_TX_BUSY; + if (ice_is_reset_in_progress(vsi->back->state) || test_bit(ICE_VF_DIS, vsi->back->state)) return NETDEV_TX_BUSY; @@ -568,6 +541,12 @@ ice_eswitch_mode_set(struct devlink *devlink, u16 mode, break; case DEVLINK_ESWITCH_MODE_SWITCHDEV: { + if (ice_is_adq_active(pf)) { + dev_err(ice_pf_to_dev(pf), "Couldn't change eswitch mode to switchdev - ADQ is active. Delete ADQ configs and try again, e.g. tc qdisc del dev $PF root"); + NL_SET_ERR_MSG_MOD(extack, "Couldn't change eswitch mode to switchdev - ADQ is active. Delete ADQ configs and try again, e.g. tc qdisc del dev $PF root"); + return -EOPNOTSUPP; + } + dev_info(ice_pf_to_dev(pf), "PF %d changed eswitch mode to switchdev", pf->hw.pf_id); NL_SET_ERR_MSG_MOD(extack, "Changed eswitch mode to switchdev"); diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.h b/drivers/net/ethernet/intel/ice/ice_eswitch.h index 6a413331572b..b18bf83a2f5b 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.h +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.h @@ -20,11 +20,6 @@ bool ice_is_eswitch_mode_switchdev(struct ice_pf *pf); void ice_eswitch_update_repr(struct ice_vsi *vsi); void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf); -int -ice_eswitch_add_vf_mac_rule(struct ice_pf *pf, struct ice_vf *vf, - const u8 *mac); -void ice_eswitch_replay_vf_mac_rule(struct ice_vf *vf); -void ice_eswitch_del_vf_mac_rule(struct ice_vf *vf); void ice_eswitch_set_target_vsi(struct sk_buff *skb, struct ice_tx_offload_params *off); @@ -34,15 +29,6 @@ ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev); static inline void ice_eswitch_release(struct ice_pf *pf) { } static inline void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf) { } -static inline void ice_eswitch_replay_vf_mac_rule(struct ice_vf *vf) { } -static inline void ice_eswitch_del_vf_mac_rule(struct ice_vf *vf) { } - -static inline int -ice_eswitch_add_vf_mac_rule(struct ice_pf *pf, struct ice_vf *vf, - const u8 *mac) -{ - return -EOPNOTSUPP; -} static inline void ice_eswitch_set_target_vsi(struct sk_buff *skb, diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index f86e814354a3..ad4d4702129f 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -4,6 +4,7 @@ /* ethtool support for ice */ #include "ice.h" +#include "ice_ethtool.h" #include "ice_flow.h" #include "ice_fltr.h" #include "ice_lib.h" @@ -956,7 +957,7 @@ static u64 ice_intr_test(struct net_device *netdev) netdev_info(netdev, "interrupt test\n"); - wr32(&pf->hw, GLINT_DYN_CTL(pf->oicr_idx), + wr32(&pf->hw, GLINT_DYN_CTL(pf->oicr_irq.index), GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M | GLINT_DYN_CTL_SWINT_TRIG_M); @@ -1658,15 +1659,26 @@ ice_mask_min_supported_speeds(struct ice_hw *hw, *phy_types_low &= ~ICE_PHY_TYPE_LOW_MASK_MIN_1G; } -#define ice_ethtool_advertise_link_mode(aq_link_speed, ethtool_link_mode) \ - do { \ - if (req_speeds & (aq_link_speed) || \ - (!req_speeds && \ - (advert_phy_type_lo & phy_type_mask_lo || \ - advert_phy_type_hi & phy_type_mask_hi))) \ - ethtool_link_ksettings_add_link_mode(ks, advertising,\ - ethtool_link_mode); \ - } while (0) +/** + * ice_linkmode_set_bit - set link mode bit + * @phy_to_ethtool: PHY type to ethtool link mode struct to set + * @ks: ethtool link ksettings struct to fill out + * @req_speeds: speed requested by user + * @advert_phy_type: advertised PHY type + * @phy_type: PHY type + */ +static void +ice_linkmode_set_bit(const struct ice_phy_type_to_ethtool *phy_to_ethtool, + struct ethtool_link_ksettings *ks, u32 req_speeds, + u64 advert_phy_type, u32 phy_type) +{ + linkmode_set_bit(phy_to_ethtool->link_mode, ks->link_modes.supported); + + if (req_speeds & phy_to_ethtool->aq_link_speed || + (!req_speeds && advert_phy_type & BIT(phy_type))) + linkmode_set_bit(phy_to_ethtool->link_mode, + ks->link_modes.advertising); +} /** * ice_phy_type_to_ethtool - convert the phy_types to ethtool link modes @@ -1682,11 +1694,10 @@ ice_phy_type_to_ethtool(struct net_device *netdev, struct ice_pf *pf = vsi->back; u64 advert_phy_type_lo = 0; u64 advert_phy_type_hi = 0; - u64 phy_type_mask_lo = 0; - u64 phy_type_mask_hi = 0; u64 phy_types_high = 0; u64 phy_types_low = 0; - u16 req_speeds; + u32 req_speeds; + u32 i; req_speeds = vsi->port_info->phy.link_info.req_speeds; @@ -1743,272 +1754,22 @@ ice_phy_type_to_ethtool(struct net_device *netdev, advert_phy_type_hi = vsi->port_info->phy.phy_type_high; } - ethtool_link_ksettings_zero_link_mode(ks, supported); - ethtool_link_ksettings_zero_link_mode(ks, advertising); - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_100BASE_TX | - ICE_PHY_TYPE_LOW_100M_SGMII; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 100baseT_Full); - - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100MB, - 100baseT_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_1000BASE_T | - ICE_PHY_TYPE_LOW_1G_SGMII; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 1000baseT_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_1000MB, - 1000baseT_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_1000BASE_KX; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 1000baseKX_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_1000MB, - 1000baseKX_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_1000BASE_SX | - ICE_PHY_TYPE_LOW_1000BASE_LX; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 1000baseX_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_1000MB, - 1000baseX_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_2500BASE_T; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 2500baseT_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_2500MB, - 2500baseT_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_2500BASE_X | - ICE_PHY_TYPE_LOW_2500BASE_KX; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 2500baseX_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_2500MB, - 2500baseX_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_5GBASE_T | - ICE_PHY_TYPE_LOW_5GBASE_KR; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 5000baseT_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_5GB, - 5000baseT_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_T | - ICE_PHY_TYPE_LOW_10G_SFI_DA | - ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC | - ICE_PHY_TYPE_LOW_10G_SFI_C2C; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 10000baseT_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB, - 10000baseT_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_KR_CR1; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 10000baseKR_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB, - 10000baseKR_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_SR; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 10000baseSR_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB, - 10000baseSR_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_LR; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 10000baseLR_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB, - 10000baseLR_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_25GBASE_T | - ICE_PHY_TYPE_LOW_25GBASE_CR | - ICE_PHY_TYPE_LOW_25GBASE_CR_S | - ICE_PHY_TYPE_LOW_25GBASE_CR1 | - ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC | - ICE_PHY_TYPE_LOW_25G_AUI_C2C; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 25000baseCR_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_25GB, - 25000baseCR_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_25GBASE_SR | - ICE_PHY_TYPE_LOW_25GBASE_LR; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 25000baseSR_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_25GB, - 25000baseSR_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_25GBASE_KR | - ICE_PHY_TYPE_LOW_25GBASE_KR_S | - ICE_PHY_TYPE_LOW_25GBASE_KR1; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 25000baseKR_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_25GB, - 25000baseKR_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_KR4; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 40000baseKR4_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB, - 40000baseKR4_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_CR4 | - ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC | - ICE_PHY_TYPE_LOW_40G_XLAUI; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 40000baseCR4_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB, - 40000baseCR4_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_SR4; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 40000baseSR4_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB, - 40000baseSR4_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_LR4; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 40000baseLR4_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB, - 40000baseLR4_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_50GBASE_CR2 | - ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC | - ICE_PHY_TYPE_LOW_50G_LAUI2 | - ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC | - ICE_PHY_TYPE_LOW_50G_AUI2 | - ICE_PHY_TYPE_LOW_50GBASE_CP | - ICE_PHY_TYPE_LOW_50GBASE_SR | - ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC | - ICE_PHY_TYPE_LOW_50G_AUI1; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 50000baseCR2_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_50GB, - 50000baseCR2_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_50GBASE_KR2 | - ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 50000baseKR2_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_50GB, - 50000baseKR2_Full); - } + linkmode_zero(ks->link_modes.supported); + linkmode_zero(ks->link_modes.advertising); - phy_type_mask_lo = ICE_PHY_TYPE_LOW_50GBASE_SR2 | - ICE_PHY_TYPE_LOW_50GBASE_LR2 | - ICE_PHY_TYPE_LOW_50GBASE_FR | - ICE_PHY_TYPE_LOW_50GBASE_LR; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 50000baseSR2_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_50GB, - 50000baseSR2_Full); + for (i = 0; i < BITS_PER_TYPE(u64); i++) { + if (phy_types_low & BIT_ULL(i)) + ice_linkmode_set_bit(&phy_type_low_lkup[i], ks, + req_speeds, advert_phy_type_lo, + i); } - phy_type_mask_lo = ICE_PHY_TYPE_LOW_100GBASE_CR4 | - ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC | - ICE_PHY_TYPE_LOW_100G_CAUI4 | - ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC | - ICE_PHY_TYPE_LOW_100G_AUI4 | - ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4; - phy_type_mask_hi = ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC | - ICE_PHY_TYPE_HIGH_100G_CAUI2 | - ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC | - ICE_PHY_TYPE_HIGH_100G_AUI2; - if (phy_types_low & phy_type_mask_lo || - phy_types_high & phy_type_mask_hi) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseCR4_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB, - 100000baseCR4_Full); + for (i = 0; i < BITS_PER_TYPE(u64); i++) { + if (phy_types_high & BIT_ULL(i)) + ice_linkmode_set_bit(&phy_type_high_lkup[i], ks, + req_speeds, advert_phy_type_hi, + i); } - - if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CP2) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseCR2_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB, - 100000baseCR2_Full); - } - - if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_SR4) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseSR4_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB, - 100000baseSR4_Full); - } - - if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_SR2) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseSR2_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB, - 100000baseSR2_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_100GBASE_LR4 | - ICE_PHY_TYPE_LOW_100GBASE_DR; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseLR4_ER4_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB, - 100000baseLR4_ER4_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_100GBASE_KR4 | - ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseKR4_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB, - 100000baseKR4_Full); - } - - if (phy_types_high & ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseKR2_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB, - 100000baseKR2_Full); - } - } #define TEST_SET_BITS_TIMEOUT 50 @@ -2920,8 +2681,13 @@ ice_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, ring->rx_max_pending = ICE_MAX_NUM_DESC; ring->tx_max_pending = ICE_MAX_NUM_DESC; - ring->rx_pending = vsi->rx_rings[0]->count; - ring->tx_pending = vsi->tx_rings[0]->count; + if (vsi->tx_rings && vsi->rx_rings) { + ring->rx_pending = vsi->rx_rings[0]->count; + ring->tx_pending = vsi->tx_rings[0]->count; + } else { + ring->rx_pending = 0; + ring->tx_pending = 0; + } /* Rx mini and jumbo rings are not supported */ ring->rx_mini_max_pending = 0; @@ -2955,6 +2721,10 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, return -EINVAL; } + /* Return if there is no rings (device is reloading) */ + if (!vsi->tx_rings || !vsi->rx_rings) + return -EBUSY; + new_tx_cnt = ALIGN(ring->tx_pending, ICE_REQ_DESC_MULTIPLE); if (new_tx_cnt != ring->tx_pending) netdev_info(netdev, "Requested Tx descriptor count rounded up to %d\n", diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.h b/drivers/net/ethernet/intel/ice/ice_ethtool.h new file mode 100644 index 000000000000..b403ee79cd5e --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _ICE_ETHTOOL_H_ +#define _ICE_ETHTOOL_H_ + +struct ice_phy_type_to_ethtool { + u64 aq_link_speed; + u8 link_mode; +}; + +/* Macro to make PHY type to Ethtool link mode table entry. + * The index is the PHY type. + */ +#define ICE_PHY_TYPE(LINK_SPEED, ETHTOOL_LINK_MODE) {\ + .aq_link_speed = ICE_AQ_LINK_SPEED_##LINK_SPEED, \ + .link_mode = ETHTOOL_LINK_MODE_##ETHTOOL_LINK_MODE##_BIT, \ +} + +/* Lookup table mapping PHY type low to link speed and Ethtool link modes. + * Array index corresponds to HW PHY type bit, see + * ice_adminq_cmd.h:ICE_PHY_TYPE_LOW_*. + */ +static const struct ice_phy_type_to_ethtool +phy_type_low_lkup[] = { + [0] = ICE_PHY_TYPE(100MB, 100baseT_Full), + [1] = ICE_PHY_TYPE(100MB, 100baseT_Full), + [2] = ICE_PHY_TYPE(1000MB, 1000baseT_Full), + [3] = ICE_PHY_TYPE(1000MB, 1000baseX_Full), + [4] = ICE_PHY_TYPE(1000MB, 1000baseX_Full), + [5] = ICE_PHY_TYPE(1000MB, 1000baseKX_Full), + [6] = ICE_PHY_TYPE(1000MB, 1000baseT_Full), + [7] = ICE_PHY_TYPE(2500MB, 2500baseT_Full), + [8] = ICE_PHY_TYPE(2500MB, 2500baseX_Full), + [9] = ICE_PHY_TYPE(2500MB, 2500baseX_Full), + [10] = ICE_PHY_TYPE(5GB, 5000baseT_Full), + [11] = ICE_PHY_TYPE(5GB, 5000baseT_Full), + [12] = ICE_PHY_TYPE(10GB, 10000baseT_Full), + [13] = ICE_PHY_TYPE(10GB, 10000baseCR_Full), + [14] = ICE_PHY_TYPE(10GB, 10000baseSR_Full), + [15] = ICE_PHY_TYPE(10GB, 10000baseLR_Full), + [16] = ICE_PHY_TYPE(10GB, 10000baseKR_Full), + [17] = ICE_PHY_TYPE(10GB, 10000baseCR_Full), + [18] = ICE_PHY_TYPE(10GB, 10000baseKR_Full), + [19] = ICE_PHY_TYPE(25GB, 25000baseCR_Full), + [20] = ICE_PHY_TYPE(25GB, 25000baseCR_Full), + [21] = ICE_PHY_TYPE(25GB, 25000baseCR_Full), + [22] = ICE_PHY_TYPE(25GB, 25000baseCR_Full), + [23] = ICE_PHY_TYPE(25GB, 25000baseSR_Full), + [24] = ICE_PHY_TYPE(25GB, 25000baseSR_Full), + [25] = ICE_PHY_TYPE(25GB, 25000baseKR_Full), + [26] = ICE_PHY_TYPE(25GB, 25000baseKR_Full), + [27] = ICE_PHY_TYPE(25GB, 25000baseKR_Full), + [28] = ICE_PHY_TYPE(25GB, 25000baseSR_Full), + [29] = ICE_PHY_TYPE(25GB, 25000baseCR_Full), + [30] = ICE_PHY_TYPE(40GB, 40000baseCR4_Full), + [31] = ICE_PHY_TYPE(40GB, 40000baseSR4_Full), + [32] = ICE_PHY_TYPE(40GB, 40000baseLR4_Full), + [33] = ICE_PHY_TYPE(40GB, 40000baseKR4_Full), + [34] = ICE_PHY_TYPE(40GB, 40000baseSR4_Full), + [35] = ICE_PHY_TYPE(40GB, 40000baseCR4_Full), + [36] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full), + [37] = ICE_PHY_TYPE(50GB, 50000baseSR2_Full), + [38] = ICE_PHY_TYPE(50GB, 50000baseSR2_Full), + [39] = ICE_PHY_TYPE(50GB, 50000baseKR2_Full), + [40] = ICE_PHY_TYPE(50GB, 50000baseSR2_Full), + [41] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full), + [42] = ICE_PHY_TYPE(50GB, 50000baseSR2_Full), + [43] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full), + [44] = ICE_PHY_TYPE(50GB, 50000baseCR_Full), + [45] = ICE_PHY_TYPE(50GB, 50000baseSR_Full), + [46] = ICE_PHY_TYPE(50GB, 50000baseLR_ER_FR_Full), + [47] = ICE_PHY_TYPE(50GB, 50000baseLR_ER_FR_Full), + [48] = ICE_PHY_TYPE(50GB, 50000baseKR_Full), + [49] = ICE_PHY_TYPE(50GB, 50000baseSR_Full), + [50] = ICE_PHY_TYPE(50GB, 50000baseCR_Full), + [51] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), + [52] = ICE_PHY_TYPE(100GB, 100000baseSR4_Full), + [53] = ICE_PHY_TYPE(100GB, 100000baseLR4_ER4_Full), + [54] = ICE_PHY_TYPE(100GB, 100000baseKR4_Full), + [55] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), + [56] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), + [57] = ICE_PHY_TYPE(100GB, 100000baseSR4_Full), + [58] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), + [59] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), + [60] = ICE_PHY_TYPE(100GB, 100000baseKR4_Full), + [61] = ICE_PHY_TYPE(100GB, 100000baseCR2_Full), + [62] = ICE_PHY_TYPE(100GB, 100000baseSR2_Full), + [63] = ICE_PHY_TYPE(100GB, 100000baseLR4_ER4_Full), +}; + +/* Lookup table mapping PHY type high to link speed and Ethtool link modes. + * Array index corresponds to HW PHY type bit, see + * ice_adminq_cmd.h:ICE_PHY_TYPE_HIGH_* + */ +static const struct ice_phy_type_to_ethtool +phy_type_high_lkup[] = { + [0] = ICE_PHY_TYPE(100GB, 100000baseKR2_Full), + [1] = ICE_PHY_TYPE(100GB, 100000baseSR2_Full), + [2] = ICE_PHY_TYPE(100GB, 100000baseCR2_Full), + [3] = ICE_PHY_TYPE(100GB, 100000baseSR2_Full), + [4] = ICE_PHY_TYPE(100GB, 100000baseCR2_Full), +}; + +#endif /* !_ICE_ETHTOOL_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index ead6d50fc0ad..8c6e13f87b7d 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -1281,16 +1281,21 @@ ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp, ICE_FLOW_FLD_OFF_INVAL); } - /* add filter for outer headers */ fltr_idx = ice_ethtool_flow_to_fltr(fsp->flow_type & ~FLOW_EXT); + + assign_bit(fltr_idx, hw->fdir_perfect_fltr, perfect_filter); + + /* add filter for outer headers */ ret = ice_fdir_set_hw_fltr_rule(pf, seg, fltr_idx, ICE_FD_HW_SEG_NON_TUN); - if (ret == -EEXIST) - /* Rule already exists, free memory and continue */ - devm_kfree(dev, seg); - else if (ret) + if (ret == -EEXIST) { + /* Rule already exists, free memory and count as success */ + ret = 0; + goto err_exit; + } else if (ret) { /* could not write filter, free memory */ goto err_exit; + } /* make tunneled filter HW entries if possible */ memcpy(&tun_seg[1], seg, sizeof(*seg)); @@ -1305,18 +1310,13 @@ ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp, devm_kfree(dev, tun_seg); } - if (perfect_filter) - set_bit(fltr_idx, hw->fdir_perfect_fltr); - else - clear_bit(fltr_idx, hw->fdir_perfect_fltr); - return ret; err_exit: devm_kfree(dev, tun_seg); devm_kfree(dev, seg); - return -EOPNOTSUPP; + return ret; } /** @@ -1914,7 +1914,9 @@ int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd) input->comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL; /* input struct is added to the HW filter list */ - ice_fdir_update_list_entry(pf, input, fsp->location); + ret = ice_fdir_update_list_entry(pf, input, fsp->location); + if (ret) + goto release_lock; ret = ice_fdir_write_all_fltr(pf, input, true); if (ret) diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c index ef103e47a8dc..85cca572c22a 100644 --- a/drivers/net/ethernet/intel/ice/ice_flow.c +++ b/drivers/net/ethernet/intel/ice/ice_flow.c @@ -1304,23 +1304,6 @@ ice_flow_find_prof_id(struct ice_hw *hw, enum ice_block blk, u64 prof_id) } /** - * ice_dealloc_flow_entry - Deallocate flow entry memory - * @hw: pointer to the HW struct - * @entry: flow entry to be removed - */ -static void -ice_dealloc_flow_entry(struct ice_hw *hw, struct ice_flow_entry *entry) -{ - if (!entry) - return; - - if (entry->entry) - devm_kfree(ice_hw_to_dev(hw), entry->entry); - - devm_kfree(ice_hw_to_dev(hw), entry); -} - -/** * ice_flow_rem_entry_sync - Remove a flow entry * @hw: pointer to the HW struct * @blk: classification stage @@ -1335,7 +1318,8 @@ ice_flow_rem_entry_sync(struct ice_hw *hw, enum ice_block __always_unused blk, list_del(&entry->l_entry); - ice_dealloc_flow_entry(hw, entry); + devm_kfree(ice_hw_to_dev(hw), entry->entry); + devm_kfree(ice_hw_to_dev(hw), entry); return 0; } @@ -1662,8 +1646,7 @@ ice_flow_add_entry(struct ice_hw *hw, enum ice_block blk, u64 prof_id, out: if (status && e) { - if (e->entry) - devm_kfree(ice_hw_to_dev(hw), e->entry); + devm_kfree(ice_hw_to_dev(hw), e->entry); devm_kfree(ice_hw_to_dev(hw), e); } diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c index e6bc2285071e..145b27f2a4ce 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc.c +++ b/drivers/net/ethernet/intel/ice/ice_idc.c @@ -229,20 +229,34 @@ void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos) EXPORT_SYMBOL_GPL(ice_get_qos_params); /** - * ice_reserve_rdma_qvector - Reserve vector resources for RDMA driver + * ice_alloc_rdma_qvectors - Allocate vector resources for RDMA driver * @pf: board private structure to initialize */ -static int ice_reserve_rdma_qvector(struct ice_pf *pf) +static int ice_alloc_rdma_qvectors(struct ice_pf *pf) { if (ice_is_rdma_ena(pf)) { - int index; - - index = ice_get_res(pf, pf->irq_tracker, pf->num_rdma_msix, - ICE_RES_RDMA_VEC_ID); - if (index < 0) - return index; - pf->num_avail_sw_msix -= pf->num_rdma_msix; - pf->rdma_base_vector = (u16)index; + int i; + + pf->msix_entries = kcalloc(pf->num_rdma_msix, + sizeof(*pf->msix_entries), + GFP_KERNEL); + if (!pf->msix_entries) + return -ENOMEM; + + /* RDMA is the only user of pf->msix_entries array */ + pf->rdma_base_vector = 0; + + for (i = 0; i < pf->num_rdma_msix; i++) { + struct msix_entry *entry = &pf->msix_entries[i]; + struct msi_map map; + + map = ice_alloc_irq(pf, false); + if (map.index < 0) + break; + + entry->entry = map.index; + entry->vector = map.virq; + } } return 0; } @@ -253,9 +267,21 @@ static int ice_reserve_rdma_qvector(struct ice_pf *pf) */ static void ice_free_rdma_qvector(struct ice_pf *pf) { - pf->num_avail_sw_msix -= pf->num_rdma_msix; - ice_free_res(pf->irq_tracker, pf->rdma_base_vector, - ICE_RES_RDMA_VEC_ID); + int i; + + if (!pf->msix_entries) + return; + + for (i = 0; i < pf->num_rdma_msix; i++) { + struct msi_map map; + + map.index = pf->msix_entries[i].entry; + map.virq = pf->msix_entries[i].vector; + ice_free_irq(pf, map); + } + + kfree(pf->msix_entries); + pf->msix_entries = NULL; } /** @@ -357,7 +383,7 @@ int ice_init_rdma(struct ice_pf *pf) } /* Reserve vector resources */ - ret = ice_reserve_rdma_qvector(pf); + ret = ice_alloc_rdma_qvectors(pf); if (ret < 0) { dev_err(dev, "failed to reserve vectors for RDMA\n"); goto err_reserve_rdma_qvector; diff --git a/drivers/net/ethernet/intel/ice/ice_irq.c b/drivers/net/ethernet/intel/ice/ice_irq.c new file mode 100644 index 000000000000..ad82ff7d1995 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_irq.c @@ -0,0 +1,378 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023, Intel Corporation. */ + +#include "ice.h" +#include "ice_lib.h" +#include "ice_irq.h" + +/** + * ice_init_irq_tracker - initialize interrupt tracker + * @pf: board private structure + * @max_vectors: maximum number of vectors that tracker can hold + * @num_static: number of preallocated interrupts + */ +static void +ice_init_irq_tracker(struct ice_pf *pf, unsigned int max_vectors, + unsigned int num_static) +{ + pf->irq_tracker.num_entries = max_vectors; + pf->irq_tracker.num_static = num_static; + xa_init_flags(&pf->irq_tracker.entries, XA_FLAGS_ALLOC); +} + +/** + * ice_deinit_irq_tracker - free xarray tracker + * @pf: board private structure + */ +static void ice_deinit_irq_tracker(struct ice_pf *pf) +{ + xa_destroy(&pf->irq_tracker.entries); +} + +/** + * ice_free_irq_res - free a block of resources + * @pf: board private structure + * @index: starting index previously returned by ice_get_res + */ +static void ice_free_irq_res(struct ice_pf *pf, u16 index) +{ + struct ice_irq_entry *entry; + + entry = xa_erase(&pf->irq_tracker.entries, index); + kfree(entry); +} + +/** + * ice_get_irq_res - get an interrupt resource + * @pf: board private structure + * @dyn_only: force entry to be dynamically allocated + * + * Allocate new irq entry in the free slot of the tracker. Since xarray + * is used, always allocate new entry at the lowest possible index. Set + * proper allocation limit for maximum tracker entries. + * + * Returns allocated irq entry or NULL on failure. + */ +static struct ice_irq_entry *ice_get_irq_res(struct ice_pf *pf, bool dyn_only) +{ + struct xa_limit limit = { .max = pf->irq_tracker.num_entries, + .min = 0 }; + unsigned int num_static = pf->irq_tracker.num_static; + struct ice_irq_entry *entry; + unsigned int index; + int ret; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return NULL; + + /* skip preallocated entries if the caller says so */ + if (dyn_only) + limit.min = num_static; + + ret = xa_alloc(&pf->irq_tracker.entries, &index, entry, limit, + GFP_KERNEL); + + if (ret) { + kfree(entry); + entry = NULL; + } else { + entry->index = index; + entry->dynamic = index >= num_static; + } + + return entry; +} + +/** + * ice_reduce_msix_usage - Reduce usage of MSI-X vectors + * @pf: board private structure + * @v_remain: number of remaining MSI-X vectors to be distributed + * + * Reduce the usage of MSI-X vectors when entire request cannot be fulfilled. + * pf->num_lan_msix and pf->num_rdma_msix values are set based on number of + * remaining vectors. + */ +static void ice_reduce_msix_usage(struct ice_pf *pf, int v_remain) +{ + int v_rdma; + + if (!ice_is_rdma_ena(pf)) { + pf->num_lan_msix = v_remain; + return; + } + + /* RDMA needs at least 1 interrupt in addition to AEQ MSIX */ + v_rdma = ICE_RDMA_NUM_AEQ_MSIX + 1; + + if (v_remain < ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_RDMA_MSIX) { + dev_warn(ice_pf_to_dev(pf), "Not enough MSI-X vectors to support RDMA.\n"); + clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); + + pf->num_rdma_msix = 0; + pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX; + } else if ((v_remain < ICE_MIN_LAN_TXRX_MSIX + v_rdma) || + (v_remain - v_rdma < v_rdma)) { + /* Support minimum RDMA and give remaining vectors to LAN MSIX + */ + pf->num_rdma_msix = ICE_MIN_RDMA_MSIX; + pf->num_lan_msix = v_remain - ICE_MIN_RDMA_MSIX; + } else { + /* Split remaining MSIX with RDMA after accounting for AEQ MSIX + */ + pf->num_rdma_msix = (v_remain - ICE_RDMA_NUM_AEQ_MSIX) / 2 + + ICE_RDMA_NUM_AEQ_MSIX; + pf->num_lan_msix = v_remain - pf->num_rdma_msix; + } +} + +/** + * ice_ena_msix_range - Request a range of MSIX vectors from the OS + * @pf: board private structure + * + * Compute the number of MSIX vectors wanted and request from the OS. Adjust + * device usage if there are not enough vectors. Return the number of vectors + * reserved or negative on failure. + */ +static int ice_ena_msix_range(struct ice_pf *pf) +{ + int num_cpus, hw_num_msix, v_other, v_wanted, v_actual; + struct device *dev = ice_pf_to_dev(pf); + int err; + + hw_num_msix = pf->hw.func_caps.common_cap.num_msix_vectors; + num_cpus = num_online_cpus(); + + /* LAN miscellaneous handler */ + v_other = ICE_MIN_LAN_OICR_MSIX; + + /* Flow Director */ + if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) + v_other += ICE_FDIR_MSIX; + + /* switchdev */ + v_other += ICE_ESWITCH_MSIX; + + v_wanted = v_other; + + /* LAN traffic */ + pf->num_lan_msix = num_cpus; + v_wanted += pf->num_lan_msix; + + /* RDMA auxiliary driver */ + if (ice_is_rdma_ena(pf)) { + pf->num_rdma_msix = num_cpus + ICE_RDMA_NUM_AEQ_MSIX; + v_wanted += pf->num_rdma_msix; + } + + if (v_wanted > hw_num_msix) { + int v_remain; + + dev_warn(dev, "not enough device MSI-X vectors. wanted = %d, available = %d\n", + v_wanted, hw_num_msix); + + if (hw_num_msix < ICE_MIN_MSIX) { + err = -ERANGE; + goto exit_err; + } + + v_remain = hw_num_msix - v_other; + if (v_remain < ICE_MIN_LAN_TXRX_MSIX) { + v_other = ICE_MIN_MSIX - ICE_MIN_LAN_TXRX_MSIX; + v_remain = ICE_MIN_LAN_TXRX_MSIX; + } + + ice_reduce_msix_usage(pf, v_remain); + v_wanted = pf->num_lan_msix + pf->num_rdma_msix + v_other; + + dev_notice(dev, "Reducing request to %d MSI-X vectors for LAN traffic.\n", + pf->num_lan_msix); + if (ice_is_rdma_ena(pf)) + dev_notice(dev, "Reducing request to %d MSI-X vectors for RDMA.\n", + pf->num_rdma_msix); + } + + /* actually reserve the vectors */ + v_actual = pci_alloc_irq_vectors(pf->pdev, ICE_MIN_MSIX, v_wanted, + PCI_IRQ_MSIX); + if (v_actual < 0) { + dev_err(dev, "unable to reserve MSI-X vectors\n"); + err = v_actual; + goto exit_err; + } + + if (v_actual < v_wanted) { + dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n", + v_wanted, v_actual); + + if (v_actual < ICE_MIN_MSIX) { + /* error if we can't get minimum vectors */ + pci_free_irq_vectors(pf->pdev); + err = -ERANGE; + goto exit_err; + } else { + int v_remain = v_actual - v_other; + + if (v_remain < ICE_MIN_LAN_TXRX_MSIX) + v_remain = ICE_MIN_LAN_TXRX_MSIX; + + ice_reduce_msix_usage(pf, v_remain); + + dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n", + pf->num_lan_msix); + + if (ice_is_rdma_ena(pf)) + dev_notice(dev, "Enabled %d MSI-X vectors for RDMA.\n", + pf->num_rdma_msix); + } + } + + return v_actual; + +exit_err: + pf->num_rdma_msix = 0; + pf->num_lan_msix = 0; + return err; +} + +/** + * ice_clear_interrupt_scheme - Undo things done by ice_init_interrupt_scheme + * @pf: board private structure + */ +void ice_clear_interrupt_scheme(struct ice_pf *pf) +{ + pci_free_irq_vectors(pf->pdev); + ice_deinit_irq_tracker(pf); +} + +/** + * ice_init_interrupt_scheme - Determine proper interrupt scheme + * @pf: board private structure to initialize + */ +int ice_init_interrupt_scheme(struct ice_pf *pf) +{ + int total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors; + int vectors, max_vectors; + + vectors = ice_ena_msix_range(pf); + + if (vectors < 0) + return -ENOMEM; + + if (pci_msix_can_alloc_dyn(pf->pdev)) + max_vectors = total_vectors; + else + max_vectors = vectors; + + ice_init_irq_tracker(pf, max_vectors, vectors); + + return 0; +} + +/** + * ice_alloc_irq - Allocate new interrupt vector + * @pf: board private structure + * @dyn_only: force dynamic allocation of the interrupt + * + * Allocate new interrupt vector for a given owner id. + * return struct msi_map with interrupt details and track + * allocated interrupt appropriately. + * + * This function reserves new irq entry from the irq_tracker. + * if according to the tracker information all interrupts that + * were allocated with ice_pci_alloc_irq_vectors are already used + * and dynamically allocated interrupts are supported then new + * interrupt will be allocated with pci_msix_alloc_irq_at. + * + * Some callers may only support dynamically allocated interrupts. + * This is indicated with dyn_only flag. + * + * On failure, return map with negative .index. The caller + * is expected to check returned map index. + * + */ +struct msi_map ice_alloc_irq(struct ice_pf *pf, bool dyn_only) +{ + int sriov_base_vector = pf->sriov_base_vector; + struct msi_map map = { .index = -ENOENT }; + struct device *dev = ice_pf_to_dev(pf); + struct ice_irq_entry *entry; + + entry = ice_get_irq_res(pf, dyn_only); + if (!entry) + return map; + + /* fail if we're about to violate SRIOV vectors space */ + if (sriov_base_vector && entry->index >= sriov_base_vector) + goto exit_free_res; + + if (pci_msix_can_alloc_dyn(pf->pdev) && entry->dynamic) { + map = pci_msix_alloc_irq_at(pf->pdev, entry->index, NULL); + if (map.index < 0) + goto exit_free_res; + dev_dbg(dev, "allocated new irq at index %d\n", map.index); + } else { + map.index = entry->index; + map.virq = pci_irq_vector(pf->pdev, map.index); + } + + return map; + +exit_free_res: + dev_err(dev, "Could not allocate irq at idx %d\n", entry->index); + ice_free_irq_res(pf, entry->index); + return map; +} + +/** + * ice_free_irq - Free interrupt vector + * @pf: board private structure + * @map: map with interrupt details + * + * Remove allocated interrupt from the interrupt tracker. If interrupt was + * allocated dynamically, free respective interrupt vector. + */ +void ice_free_irq(struct ice_pf *pf, struct msi_map map) +{ + struct ice_irq_entry *entry; + + entry = xa_load(&pf->irq_tracker.entries, map.index); + + if (!entry) { + dev_err(ice_pf_to_dev(pf), "Failed to get MSIX interrupt entry at index %d", + map.index); + return; + } + + dev_dbg(ice_pf_to_dev(pf), "Free irq at index %d\n", map.index); + + if (entry->dynamic) + pci_msix_free_irq(pf->pdev, map); + + ice_free_irq_res(pf, map.index); +} + +/** + * ice_get_max_used_msix_vector - Get the max used interrupt vector + * @pf: board private structure + * + * Return index of maximum used interrupt vectors with respect to the + * beginning of the MSIX table. Take into account that some interrupts + * may have been dynamically allocated after MSIX was initially enabled. + */ +int ice_get_max_used_msix_vector(struct ice_pf *pf) +{ + unsigned long start, index, max_idx; + void *entry; + + /* Treat all preallocated interrupts as used */ + start = pf->irq_tracker.num_static; + max_idx = start - 1; + + xa_for_each_start(&pf->irq_tracker.entries, index, entry, start) { + if (index > max_idx) + max_idx = index; + } + + return max_idx; +} diff --git a/drivers/net/ethernet/intel/ice/ice_irq.h b/drivers/net/ethernet/intel/ice/ice_irq.h new file mode 100644 index 000000000000..f35efc08575e --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_irq.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2023, Intel Corporation. */ + +#ifndef _ICE_IRQ_H_ +#define _ICE_IRQ_H_ + +struct ice_irq_entry { + unsigned int index; + bool dynamic; /* allocation type flag */ +}; + +struct ice_irq_tracker { + struct xarray entries; + u16 num_entries; /* total vectors available */ + u16 num_static; /* preallocated entries */ +}; + +int ice_init_interrupt_scheme(struct ice_pf *pf); +void ice_clear_interrupt_scheme(struct ice_pf *pf); + +struct msi_map ice_alloc_irq(struct ice_pf *pf, bool dyn_only); +void ice_free_irq(struct ice_pf *pf, struct msi_map map); +int ice_get_max_used_msix_vector(struct ice_pf *pf); + +#endif diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index ee5b36941ba3..5a7753bda324 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -7,15 +7,6 @@ #include "ice_lag.h" /** - * ice_lag_nop_handler - no-op Rx handler to disable LAG - * @pskb: pointer to skb pointer - */ -rx_handler_result_t ice_lag_nop_handler(struct sk_buff __always_unused **pskb) -{ - return RX_HANDLER_PASS; -} - -/** * ice_lag_set_primary - set PF LAG state as Primary * @lag: LAG info struct */ @@ -158,7 +149,6 @@ ice_lag_link(struct ice_lag *lag, struct netdev_notifier_changeupper_info *info) lag->upper_netdev = upper; } - ice_clear_sriov_cap(pf); ice_clear_rdma_cap(pf); lag->bonded = true; @@ -205,7 +195,6 @@ ice_lag_unlink(struct ice_lag *lag, } lag->peer_netdev = NULL; - ice_set_sriov_cap(pf); ice_set_rdma_cap(pf); lag->bonded = false; lag->role = ICE_LAG_NONE; @@ -229,7 +218,6 @@ static void ice_lag_unregister(struct ice_lag *lag, struct net_device *netdev) if (lag->upper_netdev) { dev_put(lag->upper_netdev); lag->upper_netdev = NULL; - ice_set_sriov_cap(pf); ice_set_rdma_cap(pf); } /* perform some cleanup in case we come back */ diff --git a/drivers/net/ethernet/intel/ice/ice_lag.h b/drivers/net/ethernet/intel/ice/ice_lag.h index 51b5cf467ce2..2c373676c42f 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.h +++ b/drivers/net/ethernet/intel/ice/ice_lag.h @@ -25,63 +25,9 @@ struct ice_lag { struct notifier_block notif_block; u8 bonded:1; /* currently bonded */ u8 primary:1; /* this is primary */ - u8 handler:1; /* did we register a rx_netdev_handler */ - /* each thing blocking bonding will increment this value by one. - * If this value is zero, then bonding is allowed. - */ - u16 dis_lag; u8 role; }; int ice_init_lag(struct ice_pf *pf); void ice_deinit_lag(struct ice_pf *pf); -rx_handler_result_t ice_lag_nop_handler(struct sk_buff **pskb); - -/** - * ice_disable_lag - increment LAG disable count - * @lag: LAG struct - */ -static inline void ice_disable_lag(struct ice_lag *lag) -{ - /* If LAG this PF is not already disabled, disable it */ - rtnl_lock(); - if (!netdev_is_rx_handler_busy(lag->netdev)) { - if (!netdev_rx_handler_register(lag->netdev, - ice_lag_nop_handler, - NULL)) - lag->handler = true; - } - rtnl_unlock(); - lag->dis_lag++; -} - -/** - * ice_enable_lag - decrement disable count for a PF - * @lag: LAG struct - * - * Decrement the disable counter for a port, and if that count reaches - * zero, then remove the no-op Rx handler from that netdev - */ -static inline void ice_enable_lag(struct ice_lag *lag) -{ - if (lag->dis_lag) - lag->dis_lag--; - if (!lag->dis_lag && lag->handler) { - rtnl_lock(); - netdev_rx_handler_unregister(lag->netdev); - rtnl_unlock(); - lag->handler = false; - } -} - -/** - * ice_is_lag_dis - is LAG disabled - * @lag: LAG struct - * - * Return true if bonding is disabled - */ -static inline bool ice_is_lag_dis(struct ice_lag *lag) -{ - return !!(lag->dis_lag); -} #endif /* _ICE_LAG_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 11ae0e41f518..0054d7e64ec3 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -321,31 +321,19 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi) dev = ice_pf_to_dev(pf); - if (vsi->af_xdp_zc_qps) { - bitmap_free(vsi->af_xdp_zc_qps); - vsi->af_xdp_zc_qps = NULL; - } + bitmap_free(vsi->af_xdp_zc_qps); + vsi->af_xdp_zc_qps = NULL; /* free the ring and vector containers */ - if (vsi->q_vectors) { - devm_kfree(dev, vsi->q_vectors); - vsi->q_vectors = NULL; - } - if (vsi->tx_rings) { - devm_kfree(dev, vsi->tx_rings); - vsi->tx_rings = NULL; - } - if (vsi->rx_rings) { - devm_kfree(dev, vsi->rx_rings); - vsi->rx_rings = NULL; - } - if (vsi->txq_map) { - devm_kfree(dev, vsi->txq_map); - vsi->txq_map = NULL; - } - if (vsi->rxq_map) { - devm_kfree(dev, vsi->rxq_map); - vsi->rxq_map = NULL; - } + devm_kfree(dev, vsi->q_vectors); + vsi->q_vectors = NULL; + devm_kfree(dev, vsi->tx_rings); + vsi->tx_rings = NULL; + devm_kfree(dev, vsi->rx_rings); + vsi->rx_rings = NULL; + devm_kfree(dev, vsi->txq_map); + vsi->txq_map = NULL; + devm_kfree(dev, vsi->rxq_map); + vsi->rxq_map = NULL; } /** @@ -902,10 +890,8 @@ static void ice_rss_clean(struct ice_vsi *vsi) dev = ice_pf_to_dev(pf); - if (vsi->rss_hkey_user) - devm_kfree(dev, vsi->rss_hkey_user); - if (vsi->rss_lut_user) - devm_kfree(dev, vsi->rss_lut_user); + devm_kfree(dev, vsi->rss_hkey_user); + devm_kfree(dev, vsi->rss_lut_user); ice_vsi_clean_rss_flow_fld(vsi); /* remove RSS replay list */ @@ -1371,190 +1357,6 @@ out: } /** - * ice_free_res - free a block of resources - * @res: pointer to the resource - * @index: starting index previously returned by ice_get_res - * @id: identifier to track owner - * - * Returns number of resources freed - */ -int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id) -{ - int count = 0; - int i; - - if (!res || index >= res->end) - return -EINVAL; - - id |= ICE_RES_VALID_BIT; - for (i = index; i < res->end && res->list[i] == id; i++) { - res->list[i] = 0; - count++; - } - - return count; -} - -/** - * ice_search_res - Search the tracker for a block of resources - * @res: pointer to the resource - * @needed: size of the block needed - * @id: identifier to track owner - * - * Returns the base item index of the block, or -ENOMEM for error - */ -static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id) -{ - u16 start = 0, end = 0; - - if (needed > res->end) - return -ENOMEM; - - id |= ICE_RES_VALID_BIT; - - do { - /* skip already allocated entries */ - if (res->list[end++] & ICE_RES_VALID_BIT) { - start = end; - if ((start + needed) > res->end) - break; - } - - if (end == (start + needed)) { - int i = start; - - /* there was enough, so assign it to the requestor */ - while (i != end) - res->list[i++] = id; - - return start; - } - } while (end < res->end); - - return -ENOMEM; -} - -/** - * ice_get_free_res_count - Get free count from a resource tracker - * @res: Resource tracker instance - */ -static u16 ice_get_free_res_count(struct ice_res_tracker *res) -{ - u16 i, count = 0; - - for (i = 0; i < res->end; i++) - if (!(res->list[i] & ICE_RES_VALID_BIT)) - count++; - - return count; -} - -/** - * ice_get_res - get a block of resources - * @pf: board private structure - * @res: pointer to the resource - * @needed: size of the block needed - * @id: identifier to track owner - * - * Returns the base item index of the block, or negative for error - */ -int -ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id) -{ - if (!res || !pf) - return -EINVAL; - - if (!needed || needed > res->num_entries || id >= ICE_RES_VALID_BIT) { - dev_err(ice_pf_to_dev(pf), "param err: needed=%d, num_entries = %d id=0x%04x\n", - needed, res->num_entries, id); - return -EINVAL; - } - - return ice_search_res(res, needed, id); -} - -/** - * ice_get_vf_ctrl_res - Get VF control VSI resource - * @pf: pointer to the PF structure - * @vsi: the VSI to allocate a resource for - * - * Look up whether another VF has already allocated the control VSI resource. - * If so, re-use this resource so that we share it among all VFs. - * - * Otherwise, allocate the resource and return it. - */ -static int ice_get_vf_ctrl_res(struct ice_pf *pf, struct ice_vsi *vsi) -{ - struct ice_vf *vf; - unsigned int bkt; - int base; - - rcu_read_lock(); - ice_for_each_vf_rcu(pf, bkt, vf) { - if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI) { - base = pf->vsi[vf->ctrl_vsi_idx]->base_vector; - rcu_read_unlock(); - return base; - } - } - rcu_read_unlock(); - - return ice_get_res(pf, pf->irq_tracker, vsi->num_q_vectors, - ICE_RES_VF_CTRL_VEC_ID); -} - -/** - * ice_vsi_setup_vector_base - Set up the base vector for the given VSI - * @vsi: ptr to the VSI - * - * This should only be called after ice_vsi_alloc_def() which allocates the - * corresponding SW VSI structure and initializes num_queue_pairs for the - * newly allocated VSI. - * - * Returns 0 on success or negative on failure - */ -static int ice_vsi_setup_vector_base(struct ice_vsi *vsi) -{ - struct ice_pf *pf = vsi->back; - struct device *dev; - u16 num_q_vectors; - int base; - - dev = ice_pf_to_dev(pf); - /* SRIOV doesn't grab irq_tracker entries for each VSI */ - if (vsi->type == ICE_VSI_VF) - return 0; - if (vsi->type == ICE_VSI_CHNL) - return 0; - - if (vsi->base_vector) { - dev_dbg(dev, "VSI %d has non-zero base vector %d\n", - vsi->vsi_num, vsi->base_vector); - return -EEXIST; - } - - num_q_vectors = vsi->num_q_vectors; - /* reserve slots from OS requested IRQs */ - if (vsi->type == ICE_VSI_CTRL && vsi->vf) { - base = ice_get_vf_ctrl_res(pf, vsi); - } else { - base = ice_get_res(pf, pf->irq_tracker, num_q_vectors, - vsi->idx); - } - - if (base < 0) { - dev_err(dev, "%d MSI-X interrupts available. %s %d failed to get %d MSI-X vectors\n", - ice_get_free_res_count(pf->irq_tracker), - ice_vsi_type_str(vsi->type), vsi->idx, num_q_vectors); - return -ENOENT; - } - vsi->base_vector = (u16)base; - pf->num_avail_sw_msix -= num_q_vectors; - - return 0; -} - -/** * ice_vsi_clear_rings - Deallocates the Tx and Rx rings for VSI * @vsi: the VSI having rings deallocated */ @@ -2410,50 +2212,6 @@ static void ice_vsi_set_tc_cfg(struct ice_vsi *vsi) } /** - * ice_vsi_set_q_vectors_reg_idx - set the HW register index for all q_vectors - * @vsi: VSI to set the q_vectors register index on - */ -static int -ice_vsi_set_q_vectors_reg_idx(struct ice_vsi *vsi) -{ - u16 i; - - if (!vsi || !vsi->q_vectors) - return -EINVAL; - - ice_for_each_q_vector(vsi, i) { - struct ice_q_vector *q_vector = vsi->q_vectors[i]; - - if (!q_vector) { - dev_err(ice_pf_to_dev(vsi->back), "Failed to set reg_idx on q_vector %d VSI %d\n", - i, vsi->vsi_num); - goto clear_reg_idx; - } - - if (vsi->type == ICE_VSI_VF) { - struct ice_vf *vf = vsi->vf; - - q_vector->reg_idx = ice_calc_vf_reg_idx(vf, q_vector); - } else { - q_vector->reg_idx = - q_vector->v_idx + vsi->base_vector; - } - } - - return 0; - -clear_reg_idx: - ice_for_each_q_vector(vsi, i) { - struct ice_q_vector *q_vector = vsi->q_vectors[i]; - - if (q_vector) - q_vector->reg_idx = 0; - } - - return -EINVAL; -} - -/** * ice_cfg_sw_lldp - Config switch rules for LLDP packet handling * @vsi: the VSI being configured * @tx: bool to determine Tx or Rx rule @@ -2611,37 +2369,6 @@ static void ice_set_agg_vsi(struct ice_vsi *vsi) vsi->agg_node->num_vsis); } -/** - * ice_free_vf_ctrl_res - Free the VF control VSI resource - * @pf: pointer to PF structure - * @vsi: the VSI to free resources for - * - * Check if the VF control VSI resource is still in use. If no VF is using it - * any more, release the VSI resource. Otherwise, leave it to be cleaned up - * once no other VF uses it. - */ -static void ice_free_vf_ctrl_res(struct ice_pf *pf, struct ice_vsi *vsi) -{ - struct ice_vf *vf; - unsigned int bkt; - - rcu_read_lock(); - ice_for_each_vf_rcu(pf, bkt, vf) { - if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI) { - rcu_read_unlock(); - return; - } - } - rcu_read_unlock(); - - /* No other VFs left that have control VSI. It is now safe to reclaim - * SW interrupts back to the common pool. - */ - ice_free_res(pf->irq_tracker, vsi->base_vector, - ICE_RES_VF_CTRL_VEC_ID); - pf->num_avail_sw_msix += vsi->num_q_vectors; -} - static int ice_vsi_cfg_tc_lan(struct ice_pf *pf, struct ice_vsi *vsi) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; @@ -2728,14 +2455,6 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params) if (ret) goto unroll_vsi_init; - ret = ice_vsi_setup_vector_base(vsi); - if (ret) - goto unroll_alloc_q_vector; - - ret = ice_vsi_set_q_vectors_reg_idx(vsi); - if (ret) - goto unroll_vector_base; - ret = ice_vsi_alloc_rings(vsi); if (ret) goto unroll_vector_base; @@ -2788,10 +2507,6 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params) if (ret) goto unroll_alloc_q_vector; - ret = ice_vsi_set_q_vectors_reg_idx(vsi); - if (ret) - goto unroll_vector_base; - ret = ice_vsi_alloc_ring_stats(vsi); if (ret) goto unroll_vector_base; @@ -2827,8 +2542,6 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params) unroll_vector_base: /* reclaim SW interrupts back to the common pool */ - ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx); - pf->num_avail_sw_msix += vsi->num_q_vectors; unroll_alloc_q_vector: ice_vsi_free_q_vectors(vsi); unroll_vsi_init: @@ -2920,14 +2633,6 @@ void ice_vsi_decfg(struct ice_vsi *vsi) * many interrupts each VF needs. SR-IOV MSIX resources are also * cleared in the same manner. */ - if (vsi->type == ICE_VSI_CTRL && vsi->vf) { - ice_free_vf_ctrl_res(pf, vsi); - } else if (vsi->type != ICE_VSI_VF) { - /* reclaim SW interrupts back to the common pool */ - ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx); - pf->num_avail_sw_msix += vsi->num_q_vectors; - vsi->base_vector = 0; - } if (vsi->type == ICE_VSI_VF && vsi->agg_node && vsi->agg_node->valid) @@ -2993,8 +2698,6 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_vsi_cfg_params *params) return vsi; err_vsi_cfg: - if (params->type == ICE_VSI_VF) - ice_enable_lag(pf->lag); ice_vsi_free(vsi); return NULL; @@ -3044,7 +2747,6 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi) void ice_vsi_free_irq(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; - int base = vsi->base_vector; int i; if (!vsi->q_vectors || !vsi->irqs_ready) @@ -3058,10 +2760,9 @@ void ice_vsi_free_irq(struct ice_vsi *vsi) ice_free_cpu_rx_rmap(vsi); ice_for_each_q_vector(vsi, i) { - u16 vector = i + base; int irq_num; - irq_num = pf->msix_entries[vector].vector; + irq_num = vsi->q_vectors[i]->irq.virq; /* free only the irqs that were actually requested */ if (!vsi->q_vectors[i] || @@ -3193,7 +2894,6 @@ void ice_dis_vsi(struct ice_vsi *vsi, bool locked) */ void ice_vsi_dis_irq(struct ice_vsi *vsi) { - int base = vsi->base_vector; struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; u32 val; @@ -3240,7 +2940,7 @@ void ice_vsi_dis_irq(struct ice_vsi *vsi) return; ice_for_each_q_vector(vsi, i) - synchronize_irq(pf->msix_entries[i + base].vector); + synchronize_irq(vsi->q_vectors[i]->irq.virq); } /** @@ -3272,39 +2972,12 @@ int ice_vsi_release(struct ice_vsi *vsi) return -ENODEV; pf = vsi->back; - /* do not unregister while driver is in the reset recovery pending - * state. Since reset/rebuild happens through PF service task workqueue, - * it's not a good idea to unregister netdev that is associated to the - * PF that is running the work queue items currently. This is done to - * avoid check_flush_dependency() warning on this wq - */ - if (vsi->netdev && !ice_is_reset_in_progress(pf->state) && - (test_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state))) { - unregister_netdev(vsi->netdev); - clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); - } - - if (vsi->type == ICE_VSI_PF) - ice_devlink_destroy_pf_port(pf); - if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) ice_rss_clean(vsi); ice_vsi_close(vsi); ice_vsi_decfg(vsi); - if (vsi->netdev) { - if (test_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state)) { - unregister_netdev(vsi->netdev); - clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); - } - if (test_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state)) { - free_netdev(vsi->netdev); - vsi->netdev = NULL; - clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); - } - } - /* retain SW VSI data structure since it is needed to unregister and * free VSI netdev when PF is not in reset recovery pending state,\ * for ex: during rmmod. diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 75221478f2dc..e985766e6bb5 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -104,11 +104,6 @@ int ice_ena_vsi(struct ice_vsi *vsi, bool locked); void ice_vsi_decfg(struct ice_vsi *vsi); void ice_dis_vsi(struct ice_vsi *vsi, bool locked); -int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id); - -int -ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id); - int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags); int ice_vsi_cfg(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 42c318ceff61..c2cdc79308dc 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1346,6 +1346,7 @@ int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout, static void ice_aq_check_events(struct ice_pf *pf, u16 opcode, struct ice_rq_event_info *event) { + struct ice_rq_event_info *task_ev; struct ice_aq_task *task; bool found = false; @@ -1354,15 +1355,15 @@ static void ice_aq_check_events(struct ice_pf *pf, u16 opcode, if (task->state || task->opcode != opcode) continue; - memcpy(&task->event->desc, &event->desc, sizeof(event->desc)); - task->event->msg_len = event->msg_len; + task_ev = task->event; + memcpy(&task_ev->desc, &event->desc, sizeof(event->desc)); + task_ev->msg_len = event->msg_len; /* Only copy the data buffer if a destination was set */ - if (task->event->msg_buf && - task->event->buf_len > event->buf_len) { - memcpy(task->event->msg_buf, event->msg_buf, + if (task_ev->msg_buf && task_ev->buf_len >= event->buf_len) { + memcpy(task_ev->msg_buf, event->msg_buf, event->buf_len); - task->event->buf_len = event->buf_len; + task_ev->buf_len = event->buf_len; } task->state = ICE_AQ_TASK_COMPLETE; @@ -2490,7 +2491,6 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) { int q_vectors = vsi->num_q_vectors; struct ice_pf *pf = vsi->back; - int base = vsi->base_vector; struct device *dev; int rx_int_idx = 0; int tx_int_idx = 0; @@ -2501,7 +2501,7 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) for (vector = 0; vector < q_vectors; vector++) { struct ice_q_vector *q_vector = vsi->q_vectors[vector]; - irq_num = pf->msix_entries[base + vector].vector; + irq_num = q_vector->irq.virq; if (q_vector->tx.tx_ring && q_vector->rx.rx_ring) { snprintf(q_vector->name, sizeof(q_vector->name) - 1, @@ -2555,9 +2555,8 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) return 0; free_q_irqs: - while (vector) { - vector--; - irq_num = pf->msix_entries[base + vector].vector; + while (vector--) { + irq_num = vsi->q_vectors[vector]->irq.virq; if (!IS_ENABLED(CONFIG_RFS_ACCEL)) irq_set_affinity_notifier(irq_num, NULL); irq_set_affinity_hint(irq_num, NULL); @@ -2635,11 +2634,11 @@ static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog) int i; old_prog = xchg(&vsi->xdp_prog, prog); - if (old_prog) - bpf_prog_put(old_prog); - ice_for_each_rxq(vsi, i) WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog); + + if (old_prog) + bpf_prog_put(old_prog); } /** @@ -2924,6 +2923,12 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, } } + /* hot swap progs and avoid toggling link */ + if (ice_is_xdp_ena_vsi(vsi) == !!prog) { + ice_vsi_assign_bpf_prog(vsi, prog); + return 0; + } + /* need to stop netdev while setting up the program for Rx rings */ if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { ret = ice_down(vsi); @@ -2956,13 +2961,6 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, xdp_ring_err = ice_realloc_zc_buf(vsi, false); if (xdp_ring_err) NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Rx resources failed"); - } else { - /* safe to call even when prog == vsi->xdp_prog as - * dev_xdp_install in net/core/dev.c incremented prog's - * refcount so corresponding bpf_prog_put won't cause - * underflow - */ - ice_vsi_assign_bpf_prog(vsi, prog); } if (if_running) @@ -3047,7 +3045,7 @@ static void ice_ena_misc_vector(struct ice_pf *pf) wr32(hw, PFINT_OICR_ENA, val); /* SW_ITR_IDX = 0, but don't change INTENA */ - wr32(hw, GLINT_DYN_CTL(pf->oicr_idx), + wr32(hw, GLINT_DYN_CTL(pf->oicr_irq.index), GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M); } @@ -3060,7 +3058,6 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) { struct ice_pf *pf = (struct ice_pf *)data; struct ice_hw *hw = &pf->hw; - irqreturn_t ret = IRQ_NONE; struct device *dev; u32 oicr, ena_mask; @@ -3142,19 +3139,24 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) if (oicr & PFINT_OICR_TSYN_TX_M) { ena_mask &= ~PFINT_OICR_TSYN_TX_M; if (!hw->reset_ongoing) - ret = IRQ_WAKE_THREAD; + set_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread); } if (oicr & PFINT_OICR_TSYN_EVNT_M) { u8 tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; u32 gltsyn_stat = rd32(hw, GLTSYN_STAT(tmr_idx)); - /* Save EVENTs from GTSYN register */ - pf->ptp.ext_ts_irq |= gltsyn_stat & (GLTSYN_STAT_EVENT0_M | - GLTSYN_STAT_EVENT1_M | - GLTSYN_STAT_EVENT2_M); ena_mask &= ~PFINT_OICR_TSYN_EVNT_M; - kthread_queue_work(pf->ptp.kworker, &pf->ptp.extts_work); + + if (hw->func_caps.ts_func_info.src_tmr_owned) { + /* Save EVENTs from GLTSYN register */ + pf->ptp.ext_ts_irq |= gltsyn_stat & + (GLTSYN_STAT_EVENT0_M | + GLTSYN_STAT_EVENT1_M | + GLTSYN_STAT_EVENT2_M); + + set_bit(ICE_MISC_THREAD_EXTTS_EVENT, pf->misc_thread); + } } #define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_PUSH_M) @@ -3174,16 +3176,10 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) if (oicr & (PFINT_OICR_PCI_EXCEPTION_M | PFINT_OICR_ECC_ERR_M)) { set_bit(ICE_PFR_REQ, pf->state); - ice_service_task_schedule(pf); } } - if (!ret) - ret = IRQ_HANDLED; - ice_service_task_schedule(pf); - ice_irq_dynamic_ena(hw, NULL, NULL); - - return ret; + return IRQ_WAKE_THREAD; } /** @@ -3194,12 +3190,29 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data) { struct ice_pf *pf = data; + struct ice_hw *hw; + + hw = &pf->hw; if (ice_is_reset_in_progress(pf->state)) return IRQ_HANDLED; - while (!ice_ptp_process_ts(pf)) - usleep_range(50, 100); + ice_service_task_schedule(pf); + + if (test_and_clear_bit(ICE_MISC_THREAD_EXTTS_EVENT, pf->misc_thread)) + ice_ptp_extts_event(pf); + + if (test_and_clear_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread)) { + /* Process outstanding Tx timestamps. If there is more work, + * re-arm the interrupt to trigger again. + */ + if (ice_ptp_process_ts(pf) == ICE_TX_TSTAMP_WORK_PENDING) { + wr32(hw, PFINT_OICR, PFINT_OICR_TSYN_TX_M); + ice_flush(hw); + } + } + + ice_irq_dynamic_ena(hw, NULL, NULL); return IRQ_HANDLED; } @@ -3234,6 +3247,7 @@ static void ice_dis_ctrlq_interrupts(struct ice_hw *hw) */ static void ice_free_irq_msix_misc(struct ice_pf *pf) { + int misc_irq_num = pf->oicr_irq.virq; struct ice_hw *hw = &pf->hw; ice_dis_ctrlq_interrupts(hw); @@ -3242,14 +3256,10 @@ static void ice_free_irq_msix_misc(struct ice_pf *pf) wr32(hw, PFINT_OICR_ENA, 0); ice_flush(hw); - if (pf->msix_entries) { - synchronize_irq(pf->msix_entries[pf->oicr_idx].vector); - devm_free_irq(ice_pf_to_dev(pf), - pf->msix_entries[pf->oicr_idx].vector, pf); - } + synchronize_irq(misc_irq_num); + devm_free_irq(ice_pf_to_dev(pf), misc_irq_num, pf); - pf->num_avail_sw_msix += 1; - ice_free_res(pf->irq_tracker, pf->oicr_idx, ICE_RES_MISC_VEC_ID); + ice_free_irq(pf, pf->oicr_irq); } /** @@ -3295,7 +3305,8 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) { struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; - int oicr_idx, err = 0; + struct msi_map oicr_irq; + int err = 0; if (!pf->int_name[0]) snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc", @@ -3309,30 +3320,26 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) goto skip_req_irq; /* reserve one vector in irq_tracker for misc interrupts */ - oicr_idx = ice_get_res(pf, pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID); - if (oicr_idx < 0) - return oicr_idx; - - pf->num_avail_sw_msix -= 1; - pf->oicr_idx = (u16)oicr_idx; - - err = devm_request_threaded_irq(dev, - pf->msix_entries[pf->oicr_idx].vector, - ice_misc_intr, ice_misc_intr_thread_fn, - 0, pf->int_name, pf); + oicr_irq = ice_alloc_irq(pf, false); + if (oicr_irq.index < 0) + return oicr_irq.index; + + pf->oicr_irq = oicr_irq; + err = devm_request_threaded_irq(dev, pf->oicr_irq.virq, ice_misc_intr, + ice_misc_intr_thread_fn, 0, + pf->int_name, pf); if (err) { dev_err(dev, "devm_request_threaded_irq for %s failed: %d\n", pf->int_name, err); - ice_free_res(pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID); - pf->num_avail_sw_msix += 1; + ice_free_irq(pf, pf->oicr_irq); return err; } skip_req_irq: ice_ena_misc_vector(pf); - ice_ena_ctrlq_interrupts(hw, pf->oicr_idx); - wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_idx), + ice_ena_ctrlq_interrupts(hw, pf->oicr_irq.index); + wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_irq.index), ITR_REG_ALIGN(ICE_ITR_8K) >> ICE_ITR_GRAN_S); ice_flush(hw); @@ -3901,224 +3908,6 @@ static int ice_init_pf(struct ice_pf *pf) } /** - * ice_reduce_msix_usage - Reduce usage of MSI-X vectors - * @pf: board private structure - * @v_remain: number of remaining MSI-X vectors to be distributed - * - * Reduce the usage of MSI-X vectors when entire request cannot be fulfilled. - * pf->num_lan_msix and pf->num_rdma_msix values are set based on number of - * remaining vectors. - */ -static void ice_reduce_msix_usage(struct ice_pf *pf, int v_remain) -{ - int v_rdma; - - if (!ice_is_rdma_ena(pf)) { - pf->num_lan_msix = v_remain; - return; - } - - /* RDMA needs at least 1 interrupt in addition to AEQ MSIX */ - v_rdma = ICE_RDMA_NUM_AEQ_MSIX + 1; - - if (v_remain < ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_RDMA_MSIX) { - dev_warn(ice_pf_to_dev(pf), "Not enough MSI-X vectors to support RDMA.\n"); - clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); - - pf->num_rdma_msix = 0; - pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX; - } else if ((v_remain < ICE_MIN_LAN_TXRX_MSIX + v_rdma) || - (v_remain - v_rdma < v_rdma)) { - /* Support minimum RDMA and give remaining vectors to LAN MSIX */ - pf->num_rdma_msix = ICE_MIN_RDMA_MSIX; - pf->num_lan_msix = v_remain - ICE_MIN_RDMA_MSIX; - } else { - /* Split remaining MSIX with RDMA after accounting for AEQ MSIX - */ - pf->num_rdma_msix = (v_remain - ICE_RDMA_NUM_AEQ_MSIX) / 2 + - ICE_RDMA_NUM_AEQ_MSIX; - pf->num_lan_msix = v_remain - pf->num_rdma_msix; - } -} - -/** - * ice_ena_msix_range - Request a range of MSIX vectors from the OS - * @pf: board private structure - * - * Compute the number of MSIX vectors wanted and request from the OS. Adjust - * device usage if there are not enough vectors. Return the number of vectors - * reserved or negative on failure. - */ -static int ice_ena_msix_range(struct ice_pf *pf) -{ - int num_cpus, hw_num_msix, v_other, v_wanted, v_actual; - struct device *dev = ice_pf_to_dev(pf); - int err, i; - - hw_num_msix = pf->hw.func_caps.common_cap.num_msix_vectors; - num_cpus = num_online_cpus(); - - /* LAN miscellaneous handler */ - v_other = ICE_MIN_LAN_OICR_MSIX; - - /* Flow Director */ - if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) - v_other += ICE_FDIR_MSIX; - - /* switchdev */ - v_other += ICE_ESWITCH_MSIX; - - v_wanted = v_other; - - /* LAN traffic */ - pf->num_lan_msix = num_cpus; - v_wanted += pf->num_lan_msix; - - /* RDMA auxiliary driver */ - if (ice_is_rdma_ena(pf)) { - pf->num_rdma_msix = num_cpus + ICE_RDMA_NUM_AEQ_MSIX; - v_wanted += pf->num_rdma_msix; - } - - if (v_wanted > hw_num_msix) { - int v_remain; - - dev_warn(dev, "not enough device MSI-X vectors. wanted = %d, available = %d\n", - v_wanted, hw_num_msix); - - if (hw_num_msix < ICE_MIN_MSIX) { - err = -ERANGE; - goto exit_err; - } - - v_remain = hw_num_msix - v_other; - if (v_remain < ICE_MIN_LAN_TXRX_MSIX) { - v_other = ICE_MIN_MSIX - ICE_MIN_LAN_TXRX_MSIX; - v_remain = ICE_MIN_LAN_TXRX_MSIX; - } - - ice_reduce_msix_usage(pf, v_remain); - v_wanted = pf->num_lan_msix + pf->num_rdma_msix + v_other; - - dev_notice(dev, "Reducing request to %d MSI-X vectors for LAN traffic.\n", - pf->num_lan_msix); - if (ice_is_rdma_ena(pf)) - dev_notice(dev, "Reducing request to %d MSI-X vectors for RDMA.\n", - pf->num_rdma_msix); - } - - pf->msix_entries = devm_kcalloc(dev, v_wanted, - sizeof(*pf->msix_entries), GFP_KERNEL); - if (!pf->msix_entries) { - err = -ENOMEM; - goto exit_err; - } - - for (i = 0; i < v_wanted; i++) - pf->msix_entries[i].entry = i; - - /* actually reserve the vectors */ - v_actual = pci_enable_msix_range(pf->pdev, pf->msix_entries, - ICE_MIN_MSIX, v_wanted); - if (v_actual < 0) { - dev_err(dev, "unable to reserve MSI-X vectors\n"); - err = v_actual; - goto msix_err; - } - - if (v_actual < v_wanted) { - dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n", - v_wanted, v_actual); - - if (v_actual < ICE_MIN_MSIX) { - /* error if we can't get minimum vectors */ - pci_disable_msix(pf->pdev); - err = -ERANGE; - goto msix_err; - } else { - int v_remain = v_actual - v_other; - - if (v_remain < ICE_MIN_LAN_TXRX_MSIX) - v_remain = ICE_MIN_LAN_TXRX_MSIX; - - ice_reduce_msix_usage(pf, v_remain); - - dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n", - pf->num_lan_msix); - - if (ice_is_rdma_ena(pf)) - dev_notice(dev, "Enabled %d MSI-X vectors for RDMA.\n", - pf->num_rdma_msix); - } - } - - return v_actual; - -msix_err: - devm_kfree(dev, pf->msix_entries); - -exit_err: - pf->num_rdma_msix = 0; - pf->num_lan_msix = 0; - return err; -} - -/** - * ice_dis_msix - Disable MSI-X interrupt setup in OS - * @pf: board private structure - */ -static void ice_dis_msix(struct ice_pf *pf) -{ - pci_disable_msix(pf->pdev); - devm_kfree(ice_pf_to_dev(pf), pf->msix_entries); - pf->msix_entries = NULL; -} - -/** - * ice_clear_interrupt_scheme - Undo things done by ice_init_interrupt_scheme - * @pf: board private structure - */ -static void ice_clear_interrupt_scheme(struct ice_pf *pf) -{ - ice_dis_msix(pf); - - if (pf->irq_tracker) { - devm_kfree(ice_pf_to_dev(pf), pf->irq_tracker); - pf->irq_tracker = NULL; - } -} - -/** - * ice_init_interrupt_scheme - Determine proper interrupt scheme - * @pf: board private structure to initialize - */ -static int ice_init_interrupt_scheme(struct ice_pf *pf) -{ - int vectors; - - vectors = ice_ena_msix_range(pf); - - if (vectors < 0) - return vectors; - - /* set up vector assignment tracking */ - pf->irq_tracker = devm_kzalloc(ice_pf_to_dev(pf), - struct_size(pf->irq_tracker, list, vectors), - GFP_KERNEL); - if (!pf->irq_tracker) { - ice_dis_msix(pf); - return -ENOMEM; - } - - /* populate SW interrupts pool with number of OS granted IRQs. */ - pf->num_avail_sw_msix = (u16)vectors; - pf->irq_tracker->num_entries = (u16)vectors; - pf->irq_tracker->end = pf->irq_tracker->num_entries; - - return 0; -} - -/** * ice_is_wol_supported - check if WoL is supported * @hw: pointer to hardware info * @@ -4642,9 +4431,9 @@ static int ice_start_eth(struct ice_vsi *vsi) if (err) return err; - rtnl_lock(); err = ice_vsi_open(vsi); - rtnl_unlock(); + if (err) + ice_fltr_remove_all(vsi); return err; } @@ -5107,6 +4896,7 @@ int ice_load(struct ice_pf *pf) params = ice_vsi_to_params(vsi); params.flags = ICE_VSI_FLAG_INIT; + rtnl_lock(); err = ice_vsi_cfg(vsi, ¶ms); if (err) goto err_vsi_cfg; @@ -5114,6 +4904,7 @@ int ice_load(struct ice_pf *pf) err = ice_start_eth(ice_get_main_vsi(pf)); if (err) goto err_start_eth; + rtnl_unlock(); err = ice_init_rdma(pf); if (err) @@ -5128,9 +4919,11 @@ int ice_load(struct ice_pf *pf) err_init_rdma: ice_vsi_close(ice_get_main_vsi(pf)); + rtnl_lock(); err_start_eth: ice_vsi_decfg(ice_get_main_vsi(pf)); err_vsi_cfg: + rtnl_unlock(); ice_deinit_dev(pf); return err; } @@ -5143,8 +4936,10 @@ void ice_unload(struct ice_pf *pf) { ice_deinit_features(pf); ice_deinit_rdma(pf); + rtnl_lock(); ice_stop_eth(ice_get_main_vsi(pf)); ice_vsi_decfg(ice_get_main_vsi(pf)); + rtnl_unlock(); ice_deinit_dev(pf); } @@ -5835,11 +5630,6 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi) if (!is_valid_ether_addr(mac)) return -EADDRNOTAVAIL; - if (ether_addr_equal(netdev->dev_addr, mac)) { - netdev_dbg(netdev, "already using mac %pM\n", mac); - return 0; - } - if (test_bit(ICE_DOWN, pf->state) || ice_is_reset_in_progress(pf->state)) { netdev_err(netdev, "can't set mac %pM. device not ready\n", @@ -5956,6 +5746,13 @@ ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate) q_handle = vsi->tx_rings[queue_index]->q_handle; tc = ice_dcb_get_tc(vsi, queue_index); + vsi = ice_locate_vsi_using_queue(vsi, queue_index); + if (!vsi) { + netdev_err(netdev, "Invalid VSI for given queue %d\n", + queue_index); + return -EINVAL; + } + /* Set BW back to default, when user set maxrate to 0 */ if (!maxrate) status = ice_cfg_q_bw_dflt_lmt(vsi->port_info, vsi->idx, tc, @@ -7629,21 +7426,9 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) } netdev->mtu = (unsigned int)new_mtu; - - /* if VSI is up, bring it down and then back up */ - if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { - err = ice_down(vsi); - if (err) { - netdev_err(netdev, "change MTU if_down err %d\n", err); - return err; - } - - err = ice_up(vsi); - if (err) { - netdev_err(netdev, "change MTU if_up err %d\n", err); - return err; - } - } + err = ice_down_up(vsi); + if (err) + return err; netdev_dbg(netdev, "changed MTU to %d\n", new_mtu); set_bit(ICE_FLAG_MTU_CHANGED, pf->flags); @@ -8101,10 +7886,10 @@ static int ice_validate_mqprio_qopt(struct ice_vsi *vsi, struct tc_mqprio_qopt_offload *mqprio_qopt) { - u64 sum_max_rate = 0, sum_min_rate = 0; int non_power_of_2_qcount = 0; struct ice_pf *pf = vsi->back; int max_rss_q_cnt = 0; + u64 sum_min_rate = 0; struct device *dev; int i, speed; u8 num_tc; @@ -8120,6 +7905,7 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi, dev = ice_pf_to_dev(pf); vsi->ch_rss_size = 0; num_tc = mqprio_qopt->qopt.num_tc; + speed = ice_get_link_speed_kbps(vsi); for (i = 0; num_tc; i++) { int qcount = mqprio_qopt->qopt.count[i]; @@ -8160,7 +7946,6 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi, */ max_rate = mqprio_qopt->max_rate[i]; max_rate = div_u64(max_rate, ICE_BW_KBPS_DIVISOR); - sum_max_rate += max_rate; /* min_rate is minimum guaranteed rate and it can't be zero */ min_rate = mqprio_qopt->min_rate[i]; @@ -8173,6 +7958,12 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi, return -EINVAL; } + if (max_rate && max_rate > speed) { + dev_err(dev, "TC%d: max_rate(%llu Kbps) > link speed of %u Kbps\n", + i, max_rate, speed); + return -EINVAL; + } + iter_div_u64_rem(min_rate, ICE_MIN_BW_LIMIT, &rem); if (rem) { dev_err(dev, "TC%d: Min Rate not multiple of %u Kbps", @@ -8210,12 +8001,6 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi, (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) return -EINVAL; - speed = ice_get_link_speed_kbps(vsi); - if (sum_max_rate && sum_max_rate > (u64)speed) { - dev_err(dev, "Invalid max Tx rate(%llu) Kbps > speed(%u) Kbps specified\n", - sum_max_rate, speed); - return -EINVAL; - } if (sum_min_rate && sum_min_rate > (u64)speed) { dev_err(dev, "Invalid min Tx rate(%llu) Kbps > speed (%u) Kbps specified\n", sum_min_rate, speed); @@ -9029,6 +8814,7 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type, { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_pf *pf = np->vsi->back; + bool locked = false; int err; switch (type) { @@ -9038,10 +8824,32 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type, ice_setup_tc_block_cb, np, np, true); case TC_SETUP_QDISC_MQPRIO: + if (ice_is_eswitch_mode_switchdev(pf)) { + netdev_err(netdev, "TC MQPRIO offload not supported, switchdev is enabled\n"); + return -EOPNOTSUPP; + } + + if (pf->adev) { + mutex_lock(&pf->adev_mutex); + device_lock(&pf->adev->dev); + locked = true; + if (pf->adev->dev.driver) { + netdev_err(netdev, "Cannot change qdisc when RDMA is active\n"); + err = -EBUSY; + goto adev_unlock; + } + } + /* setup traffic classifier for receive side */ mutex_lock(&pf->tc_mutex); err = ice_setup_tc_mqprio_qdisc(netdev, type_data); mutex_unlock(&pf->tc_mutex); + +adev_unlock: + if (locked) { + device_unlock(&pf->adev->dev); + mutex_unlock(&pf->adev_mutex); + } return err; default: return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h index 02a4e1cf624e..6a9364761165 100644 --- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h +++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h @@ -47,6 +47,7 @@ enum ice_protocol_type { ICE_L2TPV3, ICE_VLAN_EX, ICE_VLAN_IN, + ICE_HW_METADATA, ICE_VXLAN_GPE, ICE_SCTP_IL, ICE_PROTOCOL_LAST @@ -115,17 +116,7 @@ enum ice_prot_id { #define ICE_L2TPV3_HW 104 #define ICE_UDP_OF_HW 52 /* UDP Tunnels */ -#define ICE_META_DATA_ID_HW 255 /* this is used for tunnel and VLAN type */ -#define ICE_MDID_SIZE 2 - -#define ICE_TUN_FLAG_MDID 21 -#define ICE_TUN_FLAG_MDID_OFF (ICE_MDID_SIZE * ICE_TUN_FLAG_MDID) -#define ICE_TUN_FLAG_MASK 0xFF - -#define ICE_VLAN_FLAG_MDID 20 -#define ICE_VLAN_FLAG_MDID_OFF (ICE_MDID_SIZE * ICE_VLAN_FLAG_MDID) -#define ICE_PKT_FLAGS_0_TO_15_VLAN_FLAGS_MASK 0xD000 #define ICE_TUN_FLAG_FV_IND 2 @@ -230,6 +221,191 @@ struct ice_nvgre_hdr { __be32 tni_flow; }; +/* Metadata information + * + * Not all MDIDs can be used by switch block. It depends on package version. + * + * MDID 16 (Rx offset) + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | A | B | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * A = Source port where the transaction came from (3b). + * + * B = Destination TC of the packet. The TC is relative to a port (5b). + * + * MDID 17 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | PTYPE | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * PTYPE = Encodes the packet type (10b). + * + * MDID 18 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Packet length | R | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Packet length = Length of the packet in bytes + * (packet always carriers CRC) (14b). + * R = Reserved (2b). + * + * MDID 19 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Source VSI | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Source VSI = Source VSI of packet loopbacked in switch (for egress) (10b). + */ +#define ICE_MDID_SOURCE_VSI_MASK GENMASK(9, 0) + +/* + * MDID 20 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |A|B|C|D|E|F|R|R|G|H|I|J|K|L|M|N| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * A = DSI - set for DSI RX pkts. + * B = ipsec_decrypted - invalid on NIC. + * C = marker - this is a marker packet. + * D = from_network - for TX sets to 0 + * for RX: + * * 1 - packet is from external link + * * 0 - packet source is from internal + * E = source_interface_is_rx - reflect the physical interface from where the + * packet was received: + * * 1 - Rx + * * 0 - Tx + * F = from_mng - The bit signals that the packet's origin is the management. + * G = ucast - Outer L2 MAC address is unicast. + * H = mcast - Outer L2 MAC address is multicast. + * I = bcast - Outer L2 MAC address is broadcast. + * J = second_outer_mac_present - 2 outer MAC headers are present in the packet. + * K = STAG or BVLAN - Outer L2 header has STAG (ethernet type 0x88a8) or + * BVLAN (ethernet type 0x88a8). + * L = ITAG - Outer L2 header has ITAG *ethernet type 0x88e7) + * M = EVLAN (0x8100) - Outer L2 header has EVLAN (ethernet type 0x8100) + * N = EVLAN (0x9100) - Outer L2 header has EVLAN (ethernet type 0x9100) + */ +#define ICE_PKT_VLAN_STAG BIT(12) +#define ICE_PKT_VLAN_ITAG BIT(13) +#define ICE_PKT_VLAN_EVLAN (BIT(14) | BIT(15)) +#define ICE_PKT_VLAN_MASK (ICE_PKT_VLAN_STAG | ICE_PKT_VLAN_ITAG | \ + ICE_PKT_VLAN_EVLAN) +/* MDID 21 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |A|B|C|D|E|F|G|H|I|J|R|R|K|L|M|N| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * A = VLAN (0x8100) - Outer L2 header has VLAN (ethernet type 0x8100) + * B = NSHoE - Outer L2 header has NSH (ethernet type 0x894f) + * C = MPLS (0x8847) - There is at least 1 MPLS tag in the outer header + * (ethernet type 0x8847) + * D = MPLS (0x8848) - There is at least 1 MPLS tag in the outer header + * (ethernet type 0x8848) + * E = multi MPLS - There is more than a single MPLS tag in the outer header + * F = inner MPLS - There is inner MPLS tag in the packet + * G = tunneled MAC - Set if the packet includes a tunneled MAC + * H = tunneled VLAN - Same as VLAN, but for a tunneled header + * I = pkt_is_frag - Packet is fragmented (ipv4 or ipv6) + * J = ipv6_ext - The packet has routing or destination ipv6 extension in inner + * or outer ipv6 headers + * K = RoCE - UDP packet detected as RoCEv2 + * L = UDP_XSUM_0 - Set to 1 if L4 checksum is 0 in a UDP packet + * M = ESP - This is a ESP packet + * N = NAT_ESP - This is a ESP packet encapsulated in UDP NAT + */ +#define ICE_PKT_TUNNEL_MAC BIT(6) +#define ICE_PKT_TUNNEL_VLAN BIT(7) +#define ICE_PKT_TUNNEL_MASK (ICE_PKT_TUNNEL_MAC | ICE_PKT_TUNNEL_VLAN) + +/* MDID 22 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |A|B|C|D|E|F| G |H|I|J| K |L|M| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * A = fin - fin flag in tcp header + * B = sync - sync flag in tcp header + * C = rst - rst flag in tcp header + * D = psh - psh flag in tcp header + * E = ack - ack flag in tcp header + * F = urg - urg flag in tcp header + * G = tunnel type (3b) - Flags used to decode tunnel type: + * * b000 - not a VXLAN/Geneve/GRE tunnel + * * b001 - VXLAN-GPE + * * b010 - VXLAN (non-GPE) + * * b011 - Geneve + * * b100 - GRE (no key, no xsum) + * * b101 - GREK (key, no xsum) + * * b110 - GREC (no key, xsum) + * * b111 - GREKC (key, xsum) + * H = UDP_GRE - Packet is UDP (VXLAN or VLAN_GPE or Geneve or MPLSoUDP or GRE) + * tunnel + * I = OAM - VXLAN/Geneve/tunneled NSH packet with the OAM bit set + * J = tunneled NSH - Packet has NSHoGRE or NSHoUDP + * K = switch (2b) - Direction on switch + * * b00 - normal + * * b01 - TX force only LAN + * * b10 - TX disable LAN + * * b11 - direct to VSI + * L = swpe - Represents SWPE bit in TX command + * M = sw_cmd - Switch command + * + * MDID 23 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |A|B|C|D| R |E|F|R| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * A = MAC error - Produced by MAC according to L2 error conditions + * B = PPRS no offload - FIFO overflow in PPRS or any problematic condition in + * PPRS ANA + * C = abort - Set when malicious packet is detected + * D = partial analysis - ANA's analysing got cut in the middle + * (header > 504B etc.) + * E = FLM - Flow director hit indication + * F = FDLONG - Flow direector long bucket indication + * + */ +#define ICE_MDID_SIZE 2 +#define ICE_META_DATA_ID_HW 255 + +enum ice_hw_metadata_id { + ICE_SOURCE_PORT_MDID = 16, + ICE_PTYPE_MDID = 17, + ICE_PACKET_LENGTH_MDID = 18, + ICE_SOURCE_VSI_MDID = 19, + ICE_PKT_VLAN_MDID = 20, + ICE_PKT_TUNNEL_MDID = 21, + ICE_PKT_TCP_MDID = 22, + ICE_PKT_ERROR_MDID = 23, +}; + +enum ice_hw_metadata_offset { + ICE_SOURCE_PORT_MDID_OFFSET = ICE_MDID_SIZE * ICE_SOURCE_PORT_MDID, + ICE_PTYPE_MDID_OFFSET = ICE_MDID_SIZE * ICE_PTYPE_MDID, + ICE_PACKET_LENGTH_MDID_OFFSET = ICE_MDID_SIZE * ICE_PACKET_LENGTH_MDID, + ICE_SOURCE_VSI_MDID_OFFSET = ICE_MDID_SIZE * ICE_SOURCE_VSI_MDID, + ICE_PKT_VLAN_MDID_OFFSET = ICE_MDID_SIZE * ICE_PKT_VLAN_MDID, + ICE_PKT_TUNNEL_MDID_OFFSET = ICE_MDID_SIZE * ICE_PKT_TUNNEL_MDID, + ICE_PKT_TCP_MDID_OFFSET = ICE_MDID_SIZE * ICE_PKT_TCP_MDID, + ICE_PKT_ERROR_MDID_OFFSET = ICE_MDID_SIZE * ICE_PKT_ERROR_MDID, +}; + +enum ice_pkt_flags { + ICE_PKT_FLAGS_VLAN = 0, + ICE_PKT_FLAGS_TUNNEL = 1, + ICE_PKT_FLAGS_TCP = 2, + ICE_PKT_FLAGS_ERROR = 3, +}; + +struct ice_hw_metadata { + __be16 source_port; + __be16 ptype; + __be16 packet_length; + __be16 source_vsi; + __be16 flags[4]; +}; + union ice_prot_hdr { struct ice_ether_hdr eth_hdr; struct ice_ethtype_hdr ethertype; @@ -243,6 +419,7 @@ union ice_prot_hdr { struct ice_udp_gtp_hdr gtp_hdr; struct ice_pppoe_hdr pppoe_hdr; struct ice_l2tpv3_sess_hdr l2tpv3_sess_hdr; + struct ice_hw_metadata metadata; }; /* This is mapping table entry that maps every word within a given protocol diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index ac6f06f9a2ed..81d96a40d5a7 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -617,7 +617,7 @@ ice_ptp_is_tx_tracker_up(struct ice_ptp_tx *tx) } /** - * ice_ptp_tx_tstamp - Process Tx timestamps for a port + * ice_ptp_process_tx_tstamp - Process Tx timestamps for a port * @tx: the PTP Tx timestamp tracker * * Process timestamps captured by the PHY associated with this port. To do @@ -633,15 +633,6 @@ ice_ptp_is_tx_tracker_up(struct ice_ptp_tx *tx) * 6) extend the 40 bit timestamp value to get a 64 bit timestamp value * 7) send this 64 bit timestamp to the stack * - * Returns true if all timestamps were handled, and false if any slots remain - * without a timestamp. - * - * After looping, if we still have waiting SKBs, return false. This may cause - * us effectively poll even when not strictly necessary. We do this because - * it's possible a new timestamp was requested around the same time as the - * interrupt. In some cases hardware might not interrupt us again when the - * timestamp is captured. - * * Note that we do not hold the tracking lock while reading the Tx timestamp. * This is because reading the timestamp requires taking a mutex that might * sleep. @@ -673,10 +664,9 @@ ice_ptp_is_tx_tracker_up(struct ice_ptp_tx *tx) * the packet will never be sent by hardware and discard it without reading * the timestamp register. */ -static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) +static void ice_ptp_process_tx_tstamp(struct ice_ptp_tx *tx) { struct ice_ptp_port *ptp_port; - bool more_timestamps; struct ice_pf *pf; struct ice_hw *hw; u64 tstamp_ready; @@ -685,7 +675,7 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) u8 idx; if (!tx->init) - return true; + return; ptp_port = container_of(tx, struct ice_ptp_port, tx); pf = ptp_port_to_pf(ptp_port); @@ -694,7 +684,7 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) /* Read the Tx ready status first */ err = ice_get_phy_tx_tstamp_ready(hw, tx->block, &tstamp_ready); if (err) - return false; + return; /* Drop packets if the link went down */ link_up = ptp_port->link_up; @@ -782,15 +772,34 @@ skip_ts_read: skb_tstamp_tx(skb, &shhwtstamps); dev_kfree_skb_any(skb); } +} - /* Check if we still have work to do. If so, re-queue this task to - * poll for remaining timestamps. - */ +/** + * ice_ptp_tx_tstamp - Process Tx timestamps for this function. + * @tx: Tx tracking structure to initialize + * + * Returns: ICE_TX_TSTAMP_WORK_PENDING if there are any outstanding incomplete + * Tx timestamps, or ICE_TX_TSTAMP_WORK_DONE otherwise. + */ +static enum ice_tx_tstamp_work ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) +{ + bool more_timestamps; + + if (!tx->init) + return ICE_TX_TSTAMP_WORK_DONE; + + /* Process the Tx timestamp tracker */ + ice_ptp_process_tx_tstamp(tx); + + /* Check if there are outstanding Tx timestamps */ spin_lock(&tx->lock); more_timestamps = tx->init && !bitmap_empty(tx->in_use, tx->len); spin_unlock(&tx->lock); - return !more_timestamps; + if (more_timestamps) + return ICE_TX_TSTAMP_WORK_PENDING; + + return ICE_TX_TSTAMP_WORK_DONE; } /** @@ -911,7 +920,7 @@ ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) spin_unlock(&tx->lock); /* wait for potentially outstanding interrupt to complete */ - synchronize_irq(pf->msix_entries[pf->oicr_idx].vector); + synchronize_irq(pf->oicr_irq.virq); ice_ptp_flush_tx_tracker(pf, tx); @@ -1458,15 +1467,11 @@ static int ice_ptp_adjfine(struct ptp_clock_info *info, long scaled_ppm) } /** - * ice_ptp_extts_work - Workqueue task function - * @work: external timestamp work structure - * - * Service for PTP external clock event + * ice_ptp_extts_event - Process PTP external clock event + * @pf: Board private structure */ -static void ice_ptp_extts_work(struct kthread_work *work) +void ice_ptp_extts_event(struct ice_pf *pf) { - struct ice_ptp *ptp = container_of(work, struct ice_ptp, extts_work); - struct ice_pf *pf = container_of(ptp, struct ice_pf, ptp); struct ptp_clock_event event; struct ice_hw *hw = &pf->hw; u8 chan, tmr_idx; @@ -2430,9 +2435,10 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) * ice_ptp_process_ts - Process the PTP Tx timestamps * @pf: Board private structure * - * Returns true if timestamps are processed. + * Returns: ICE_TX_TSTAMP_WORK_PENDING if there are any outstanding Tx + * timestamps that need processing, and ICE_TX_TSTAMP_WORK_DONE otherwise. */ -bool ice_ptp_process_ts(struct ice_pf *pf) +enum ice_tx_tstamp_work ice_ptp_process_ts(struct ice_pf *pf) { return ice_ptp_tx_tstamp(&pf->ptp.port.tx); } @@ -2558,7 +2564,6 @@ void ice_ptp_prepare_for_reset(struct ice_pf *pf) ice_ptp_cfg_timestamp(pf, false); kthread_cancel_delayed_work_sync(&ptp->work); - kthread_cancel_work_sync(&ptp->extts_work); if (test_bit(ICE_PFR_REQ, pf->state)) return; @@ -2656,7 +2661,6 @@ static int ice_ptp_init_work(struct ice_pf *pf, struct ice_ptp *ptp) /* Initialize work functions */ kthread_init_delayed_work(&ptp->work, ice_ptp_periodic_work); - kthread_init_work(&ptp->extts_work, ice_ptp_extts_work); /* Allocate a kworker for handling work required for the ports * connected to the PTP hardware clock. diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index 9cda2f43e0e5..995a57019ba7 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -109,6 +109,16 @@ struct ice_tx_tstamp { }; /** + * enum ice_tx_tstamp_work - Status of Tx timestamp work function + * @ICE_TX_TSTAMP_WORK_DONE: Tx timestamp processing is complete + * @ICE_TX_TSTAMP_WORK_PENDING: More Tx timestamps are pending + */ +enum ice_tx_tstamp_work { + ICE_TX_TSTAMP_WORK_DONE = 0, + ICE_TX_TSTAMP_WORK_PENDING, +}; + +/** * struct ice_ptp_tx - Tracking structure for all Tx timestamp requests on a port * @lock: lock to prevent concurrent access to fields of this struct * @tstamps: array of len to store outstanding requests @@ -169,7 +179,6 @@ struct ice_ptp_port { * struct ice_ptp - data used for integrating with CONFIG_PTP_1588_CLOCK * @port: data for the PHY port initialization procedure * @work: delayed work function for periodic tasks - * @extts_work: work function for handling external Tx timestamps * @cached_phc_time: a cached copy of the PHC time for timestamp extension * @cached_phc_jiffies: jiffies when cached_phc_time was last updated * @ext_ts_chan: the external timestamp channel in use @@ -190,7 +199,6 @@ struct ice_ptp_port { struct ice_ptp { struct ice_ptp_port port; struct kthread_delayed_work work; - struct kthread_work extts_work; u64 cached_phc_time; unsigned long cached_phc_jiffies; u8 ext_ts_chan; @@ -256,8 +264,9 @@ int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr); void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena); int ice_get_ptp_clock_index(struct ice_pf *pf); +void ice_ptp_extts_event(struct ice_pf *pf); s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb); -bool ice_ptp_process_ts(struct ice_pf *pf); +enum ice_tx_tstamp_work ice_ptp_process_ts(struct ice_pf *pf); void ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring, @@ -284,6 +293,7 @@ static inline int ice_get_ptp_clock_index(struct ice_pf *pf) return -1; } +static inline void ice_ptp_extts_event(struct ice_pf *pf) { } static inline s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) { diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index a38614d21ea8..de1d83300481 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -131,6 +131,8 @@ static void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) case READ_TIME: cmd_val |= GLTSYN_CMD_READ_TIME; break; + case ICE_PTP_NOP: + break; } wr32(hw, GLTSYN_CMD, cmd_val); @@ -1226,18 +1228,18 @@ ice_ptp_read_port_capture(struct ice_hw *hw, u8 port, u64 *tx_ts, u64 *rx_ts) } /** - * ice_ptp_one_port_cmd - Prepare a single PHY port for a timer command + * ice_ptp_write_port_cmd_e822 - Prepare a single PHY port for a timer command * @hw: pointer to HW struct * @port: Port to which cmd has to be sent * @cmd: Command to be sent to the port * * Prepare the requested port for an upcoming timer sync command. * - * Note there is no equivalent of this operation on E810, as that device - * always handles all external PHYs internally. + * Do not use this function directly. If you want to configure exactly one + * port, use ice_ptp_one_port_cmd() instead. */ static int -ice_ptp_one_port_cmd(struct ice_hw *hw, u8 port, enum ice_ptp_tmr_cmd cmd) +ice_ptp_write_port_cmd_e822(struct ice_hw *hw, u8 port, enum ice_ptp_tmr_cmd cmd) { u32 cmd_val, val; u8 tmr_idx; @@ -1261,6 +1263,8 @@ ice_ptp_one_port_cmd(struct ice_hw *hw, u8 port, enum ice_ptp_tmr_cmd cmd) case ADJ_TIME_AT_TIME: cmd_val |= PHY_CMD_ADJ_TIME_AT_TIME; break; + case ICE_PTP_NOP: + break; } /* Tx case */ @@ -1307,6 +1311,39 @@ ice_ptp_one_port_cmd(struct ice_hw *hw, u8 port, enum ice_ptp_tmr_cmd cmd) } /** + * ice_ptp_one_port_cmd - Prepare one port for a timer command + * @hw: pointer to the HW struct + * @configured_port: the port to configure with configured_cmd + * @configured_cmd: timer command to prepare on the configured_port + * + * Prepare the configured_port for the configured_cmd, and prepare all other + * ports for ICE_PTP_NOP. This causes the configured_port to execute the + * desired command while all other ports perform no operation. + */ +static int +ice_ptp_one_port_cmd(struct ice_hw *hw, u8 configured_port, + enum ice_ptp_tmr_cmd configured_cmd) +{ + u8 port; + + for (port = 0; port < ICE_NUM_EXTERNAL_PORTS; port++) { + enum ice_ptp_tmr_cmd cmd; + int err; + + if (port == configured_port) + cmd = configured_cmd; + else + cmd = ICE_PTP_NOP; + + err = ice_ptp_write_port_cmd_e822(hw, port, cmd); + if (err) + return err; + } + + return 0; +} + +/** * ice_ptp_port_cmd_e822 - Prepare all ports for a timer command * @hw: pointer to the HW struct * @cmd: timer command to prepare @@ -1322,7 +1359,7 @@ ice_ptp_port_cmd_e822(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) for (port = 0; port < ICE_NUM_EXTERNAL_PORTS; port++) { int err; - err = ice_ptp_one_port_cmd(hw, port, cmd); + err = ice_ptp_write_port_cmd_e822(hw, port, cmd); if (err) return err; } @@ -2252,6 +2289,9 @@ static int ice_sync_phy_timer_e822(struct ice_hw *hw, u8 port) if (err) goto err_unlock; + /* Do not perform any action on the main timer */ + ice_ptp_src_cmd(hw, ICE_PTP_NOP); + /* Issue the sync to activate the time adjustment */ ice_ptp_exec_tmr_cmd(hw); @@ -2372,6 +2412,9 @@ int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port) if (err) return err; + /* Do not perform any action on the main timer */ + ice_ptp_src_cmd(hw, ICE_PTP_NOP); + ice_ptp_exec_tmr_cmd(hw); err = ice_read_phy_reg_e822(hw, port, P_REG_PS, &val); @@ -2847,6 +2890,8 @@ static int ice_ptp_port_cmd_e810(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) case ADJ_TIME_AT_TIME: cmd_val = GLTSYN_CMD_ADJ_INIT_TIME; break; + case ICE_PTP_NOP: + return 0; } /* Read, modify, write */ diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h index 3b68cb91bd81..096685237ca6 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -9,7 +9,8 @@ enum ice_ptp_tmr_cmd { INIT_INCVAL, ADJ_TIME, ADJ_TIME_AT_TIME, - READ_TIME + READ_TIME, + ICE_PTP_NOP, }; enum ice_ptp_serdes { diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index fd1f8b0ad0ab..e30e12321abd 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -298,14 +298,6 @@ static int ice_repr_add(struct ice_vf *vf) if (!repr) return -ENOMEM; -#ifdef CONFIG_ICE_SWITCHDEV - repr->mac_rule = kzalloc(sizeof(*repr->mac_rule), GFP_KERNEL); - if (!repr->mac_rule) { - err = -ENOMEM; - goto err_alloc_rule; - } -#endif - repr->netdev = alloc_etherdev(sizeof(struct ice_netdev_priv)); if (!repr->netdev) { err = -ENOMEM; @@ -351,11 +343,6 @@ err_alloc_q_vector: free_netdev(repr->netdev); repr->netdev = NULL; err_alloc: -#ifdef CONFIG_ICE_SWITCHDEV - kfree(repr->mac_rule); - repr->mac_rule = NULL; -err_alloc_rule: -#endif kfree(repr); vf->repr = NULL; return err; @@ -376,10 +363,6 @@ static void ice_repr_rem(struct ice_vf *vf) ice_devlink_destroy_vf_port(vf); free_netdev(vf->repr->netdev); vf->repr->netdev = NULL; -#ifdef CONFIG_ICE_SWITCHDEV - kfree(vf->repr->mac_rule); - vf->repr->mac_rule = NULL; -#endif kfree(vf->repr); vf->repr = NULL; diff --git a/drivers/net/ethernet/intel/ice/ice_repr.h b/drivers/net/ethernet/intel/ice/ice_repr.h index 378a45bfa256..9c2a6f496b3b 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.h +++ b/drivers/net/ethernet/intel/ice/ice_repr.h @@ -13,9 +13,8 @@ struct ice_repr { struct net_device *netdev; struct metadata_dst *dst; #ifdef CONFIG_ICE_SWITCHDEV - /* info about slow path MAC rule */ - struct ice_rule_query_data *mac_rule; - u8 rule_added; + /* info about slow path rule */ + struct ice_rule_query_data sp_rule; #endif }; diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index b7682de0ae05..b664d60fd037 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -358,10 +358,7 @@ void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node) node->sibling; } - /* leaf nodes have no children */ - if (node->children) - devm_kfree(ice_hw_to_dev(hw), node->children); - + devm_kfree(ice_hw_to_dev(hw), node->children); kfree(node->name); xa_erase(&pi->sched_node_ids, node->id); devm_kfree(ice_hw_to_dev(hw), node); @@ -859,10 +856,8 @@ void ice_sched_cleanup_all(struct ice_hw *hw) if (!hw) return; - if (hw->layer_info) { - devm_kfree(ice_hw_to_dev(hw), hw->layer_info); - hw->layer_info = NULL; - } + devm_kfree(ice_hw_to_dev(hw), hw->layer_info); + hw->layer_info = NULL; ice_sched_clear_port(hw->port_info); diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 588ad8696756..31314e7540f8 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -135,18 +135,9 @@ static void ice_dis_vf_mappings(struct ice_vf *vf) */ static int ice_sriov_free_msix_res(struct ice_pf *pf) { - struct ice_res_tracker *res; - if (!pf) return -EINVAL; - res = pf->irq_tracker; - if (!res) - return -EINVAL; - - /* give back irq_tracker resources used */ - WARN_ON(pf->sriov_base_vector < res->num_entries); - pf->sriov_base_vector = 0; return 0; @@ -410,29 +401,6 @@ int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector) } /** - * ice_get_max_valid_res_idx - Get the max valid resource index - * @res: pointer to the resource to find the max valid index for - * - * Start from the end of the ice_res_tracker and return right when we find the - * first res->list entry with the ICE_RES_VALID_BIT set. This function is only - * valid for SR-IOV because it is the only consumer that manipulates the - * res->end and this is always called when res->end is set to res->num_entries. - */ -static int ice_get_max_valid_res_idx(struct ice_res_tracker *res) -{ - int i; - - if (!res) - return -EINVAL; - - for (i = res->num_entries - 1; i >= 0; i--) - if (res->list[i] & ICE_RES_VALID_BIT) - return i; - - return 0; -} - -/** * ice_sriov_set_msix_res - Set any used MSIX resources * @pf: pointer to PF structure * @num_msix_needed: number of MSIX vectors needed for all SR-IOV VFs @@ -450,7 +418,7 @@ static int ice_get_max_valid_res_idx(struct ice_res_tracker *res) static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed) { u16 total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors; - int vectors_used = pf->irq_tracker->num_entries; + int vectors_used = ice_get_max_used_msix_vector(pf); int sriov_base_vector; sriov_base_vector = total_vectors - num_msix_needed; @@ -490,7 +458,7 @@ static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed) */ static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs) { - int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker); + int vectors_used = ice_get_max_used_msix_vector(pf); u16 num_msix_per_vf, num_txq, num_rxq, avail_qs; int msix_avail_per_vf, msix_avail_for_sriov; struct device *dev = ice_pf_to_dev(pf); @@ -501,12 +469,9 @@ static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs) if (!num_vfs) return -EINVAL; - if (max_valid_res_idx < 0) - return -ENOSPC; - /* determine MSI-X resources per VF */ msix_avail_for_sriov = pf->hw.func_caps.common_cap.num_msix_vectors - - pf->irq_tracker->num_entries; + vectors_used; msix_avail_per_vf = msix_avail_for_sriov / num_vfs; if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_MED) { num_msix_per_vf = ICE_NUM_VF_MSIX_MED; @@ -871,7 +836,7 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) int ret; /* Disable global interrupt 0 so we don't try to handle the VFLR. */ - wr32(hw, GLINT_DYN_CTL(pf->oicr_idx), + wr32(hw, GLINT_DYN_CTL(pf->oicr_irq.index), ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S); set_bit(ICE_OICR_INTR_DIS, pf->state); ice_flush(hw); @@ -940,14 +905,13 @@ err_unroll_intr: */ static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs) { - int pre_existing_vfs = pci_num_vf(pf->pdev); struct device *dev = ice_pf_to_dev(pf); int err; - if (pre_existing_vfs && pre_existing_vfs != num_vfs) + if (!num_vfs) { ice_free_vfs(pf); - else if (pre_existing_vfs && pre_existing_vfs == num_vfs) return 0; + } if (num_vfs > pf->vfs.num_supported) { dev_err(dev, "Can't enable %d VFs, max VFs supported is %d\n", @@ -1014,8 +978,6 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!num_vfs) { if (!pci_vfs_assigned(pdev)) { ice_free_vfs(pf); - if (pf->lag) - ice_enable_lag(pf->lag); return 0; } @@ -1027,8 +989,6 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) if (err) return err; - if (pf->lag) - ice_disable_lag(pf->lag); return num_vfs; } @@ -1171,7 +1131,7 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) if (!vf) return -EINVAL; - ret = ice_check_vf_ready_for_reset(vf); + ret = ice_check_vf_ready_for_cfg(vf); if (ret) goto out_put_vf; @@ -1286,7 +1246,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) goto out_put_vf; } - ret = ice_check_vf_ready_for_reset(vf); + ret = ice_check_vf_ready_for_cfg(vf); if (ret) goto out_put_vf; @@ -1340,7 +1300,7 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) return -EOPNOTSUPP; } - ret = ice_check_vf_ready_for_reset(vf); + ret = ice_check_vf_ready_for_cfg(vf); if (ret) goto out_put_vf; @@ -1653,7 +1613,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, if (!vf) return -EINVAL; - ret = ice_check_vf_ready_for_reset(vf); + ret = ice_check_vf_ready_for_cfg(vf); if (ret) goto out_put_vf; diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 46b36851af46..6db4ca7978cb 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -1636,21 +1636,16 @@ ice_save_vsi_ctx(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi) */ static void ice_clear_vsi_q_ctx(struct ice_hw *hw, u16 vsi_handle) { - struct ice_vsi_ctx *vsi; + struct ice_vsi_ctx *vsi = ice_get_vsi_ctx(hw, vsi_handle); u8 i; - vsi = ice_get_vsi_ctx(hw, vsi_handle); if (!vsi) return; ice_for_each_traffic_class(i) { - if (vsi->lan_q_ctx[i]) { - devm_kfree(ice_hw_to_dev(hw), vsi->lan_q_ctx[i]); - vsi->lan_q_ctx[i] = NULL; - } - if (vsi->rdma_q_ctx[i]) { - devm_kfree(ice_hw_to_dev(hw), vsi->rdma_q_ctx[i]); - vsi->rdma_q_ctx[i] = NULL; - } + devm_kfree(ice_hw_to_dev(hw), vsi->lan_q_ctx[i]); + vsi->lan_q_ctx[i] = NULL; + devm_kfree(ice_hw_to_dev(hw), vsi->rdma_q_ctx[i]); + vsi->rdma_q_ctx[i] = NULL; } } @@ -4540,6 +4535,11 @@ ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items, return status; } +#define ICE_PROTOCOL_ENTRY(id, ...) { \ + .prot_type = id, \ + .offs = {__VA_ARGS__}, \ +} + /* This is mapping table entry that maps every word within a given protocol * structure to the real byte offset as per the specification of that * protocol header. @@ -4550,29 +4550,38 @@ ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items, * structure is added to that union. */ static const struct ice_prot_ext_tbl_entry ice_prot_ext[ICE_PROTOCOL_LAST] = { - { ICE_MAC_OFOS, { 0, 2, 4, 6, 8, 10, 12 } }, - { ICE_MAC_IL, { 0, 2, 4, 6, 8, 10, 12 } }, - { ICE_ETYPE_OL, { 0 } }, - { ICE_ETYPE_IL, { 0 } }, - { ICE_VLAN_OFOS, { 2, 0 } }, - { ICE_IPV4_OFOS, { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18 } }, - { ICE_IPV4_IL, { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18 } }, - { ICE_IPV6_OFOS, { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, - 26, 28, 30, 32, 34, 36, 38 } }, - { ICE_IPV6_IL, { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, - 26, 28, 30, 32, 34, 36, 38 } }, - { ICE_TCP_IL, { 0, 2 } }, - { ICE_UDP_OF, { 0, 2 } }, - { ICE_UDP_ILOS, { 0, 2 } }, - { ICE_VXLAN, { 8, 10, 12, 14 } }, - { ICE_GENEVE, { 8, 10, 12, 14 } }, - { ICE_NVGRE, { 0, 2, 4, 6 } }, - { ICE_GTP, { 8, 10, 12, 14, 16, 18, 20, 22 } }, - { ICE_GTP_NO_PAY, { 8, 10, 12, 14 } }, - { ICE_PPPOE, { 0, 2, 4, 6 } }, - { ICE_L2TPV3, { 0, 2, 4, 6, 8, 10 } }, - { ICE_VLAN_EX, { 2, 0 } }, - { ICE_VLAN_IN, { 2, 0 } }, + ICE_PROTOCOL_ENTRY(ICE_MAC_OFOS, 0, 2, 4, 6, 8, 10, 12), + ICE_PROTOCOL_ENTRY(ICE_MAC_IL, 0, 2, 4, 6, 8, 10, 12), + ICE_PROTOCOL_ENTRY(ICE_ETYPE_OL, 0), + ICE_PROTOCOL_ENTRY(ICE_ETYPE_IL, 0), + ICE_PROTOCOL_ENTRY(ICE_VLAN_OFOS, 2, 0), + ICE_PROTOCOL_ENTRY(ICE_IPV4_OFOS, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18), + ICE_PROTOCOL_ENTRY(ICE_IPV4_IL, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18), + ICE_PROTOCOL_ENTRY(ICE_IPV6_OFOS, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, + 20, 22, 24, 26, 28, 30, 32, 34, 36, 38), + ICE_PROTOCOL_ENTRY(ICE_IPV6_IL, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, + 22, 24, 26, 28, 30, 32, 34, 36, 38), + ICE_PROTOCOL_ENTRY(ICE_TCP_IL, 0, 2), + ICE_PROTOCOL_ENTRY(ICE_UDP_OF, 0, 2), + ICE_PROTOCOL_ENTRY(ICE_UDP_ILOS, 0, 2), + ICE_PROTOCOL_ENTRY(ICE_VXLAN, 8, 10, 12, 14), + ICE_PROTOCOL_ENTRY(ICE_GENEVE, 8, 10, 12, 14), + ICE_PROTOCOL_ENTRY(ICE_NVGRE, 0, 2, 4, 6), + ICE_PROTOCOL_ENTRY(ICE_GTP, 8, 10, 12, 14, 16, 18, 20, 22), + ICE_PROTOCOL_ENTRY(ICE_GTP_NO_PAY, 8, 10, 12, 14), + ICE_PROTOCOL_ENTRY(ICE_PPPOE, 0, 2, 4, 6), + ICE_PROTOCOL_ENTRY(ICE_L2TPV3, 0, 2, 4, 6, 8, 10), + ICE_PROTOCOL_ENTRY(ICE_VLAN_EX, 2, 0), + ICE_PROTOCOL_ENTRY(ICE_VLAN_IN, 2, 0), + ICE_PROTOCOL_ENTRY(ICE_HW_METADATA, + ICE_SOURCE_PORT_MDID_OFFSET, + ICE_PTYPE_MDID_OFFSET, + ICE_PACKET_LENGTH_MDID_OFFSET, + ICE_SOURCE_VSI_MDID_OFFSET, + ICE_PKT_VLAN_MDID_OFFSET, + ICE_PKT_TUNNEL_MDID_OFFSET, + ICE_PKT_TCP_MDID_OFFSET, + ICE_PKT_ERROR_MDID_OFFSET), }; static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = { @@ -4597,6 +4606,7 @@ static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = { { ICE_L2TPV3, ICE_L2TPV3_HW }, { ICE_VLAN_EX, ICE_VLAN_OF_HW }, { ICE_VLAN_IN, ICE_VLAN_OL_HW }, + { ICE_HW_METADATA, ICE_META_DATA_ID_HW }, }; /** @@ -5255,71 +5265,6 @@ ice_create_recipe_group(struct ice_hw *hw, struct ice_sw_recipe *rm, return status; } -/** - * ice_tun_type_match_word - determine if tun type needs a match mask - * @tun_type: tunnel type - * @mask: mask to be used for the tunnel - */ -static bool ice_tun_type_match_word(enum ice_sw_tunnel_type tun_type, u16 *mask) -{ - switch (tun_type) { - case ICE_SW_TUN_GENEVE: - case ICE_SW_TUN_VXLAN: - case ICE_SW_TUN_NVGRE: - case ICE_SW_TUN_GTPU: - case ICE_SW_TUN_GTPC: - *mask = ICE_TUN_FLAG_MASK; - return true; - - default: - *mask = 0; - return false; - } -} - -/** - * ice_add_special_words - Add words that are not protocols, such as metadata - * @rinfo: other information regarding the rule e.g. priority and action info - * @lkup_exts: lookup word structure - * @dvm_ena: is double VLAN mode enabled - */ -static int -ice_add_special_words(struct ice_adv_rule_info *rinfo, - struct ice_prot_lkup_ext *lkup_exts, bool dvm_ena) -{ - u16 mask; - - /* If this is a tunneled packet, then add recipe index to match the - * tunnel bit in the packet metadata flags. - */ - if (ice_tun_type_match_word(rinfo->tun_type, &mask)) { - if (lkup_exts->n_val_words < ICE_MAX_CHAIN_WORDS) { - u8 word = lkup_exts->n_val_words++; - - lkup_exts->fv_words[word].prot_id = ICE_META_DATA_ID_HW; - lkup_exts->fv_words[word].off = ICE_TUN_FLAG_MDID_OFF; - lkup_exts->field_mask[word] = mask; - } else { - return -ENOSPC; - } - } - - if (rinfo->vlan_type != 0 && dvm_ena) { - if (lkup_exts->n_val_words < ICE_MAX_CHAIN_WORDS) { - u8 word = lkup_exts->n_val_words++; - - lkup_exts->fv_words[word].prot_id = ICE_META_DATA_ID_HW; - lkup_exts->fv_words[word].off = ICE_VLAN_FLAG_MDID_OFF; - lkup_exts->field_mask[word] = - ICE_PKT_FLAGS_0_TO_15_VLAN_FLAGS_MASK; - } else { - return -ENOSPC; - } - } - - return 0; -} - /* ice_get_compat_fv_bitmap - Get compatible field vector bitmap for rule * @hw: pointer to hardware structure * @rinfo: other information regarding the rule e.g. priority and action info @@ -5433,13 +5378,6 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, if (status) goto err_unroll; - /* Create any special protocol/offset pairs, such as looking at tunnel - * bits by extracting metadata - */ - status = ice_add_special_words(rinfo, lkup_exts, ice_is_dvm_ena(hw)); - if (status) - goto err_unroll; - /* Group match words into recipes using preferred recipe grouping * criteria. */ @@ -5525,9 +5463,7 @@ err_unroll: devm_kfree(ice_hw_to_dev(hw), fvit); } - if (rm->root_buf) - devm_kfree(ice_hw_to_dev(hw), rm->root_buf); - + devm_kfree(ice_hw_to_dev(hw), rm->root_buf); kfree(rm); err_free_lkup_exts: @@ -5725,6 +5661,10 @@ ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, * was already checked when search for the dummy packet */ type = lkups[i].type; + /* metadata isn't present in the packet */ + if (type == ICE_HW_METADATA) + continue; + for (j = 0; offsets[j].type != ICE_PROTOCOL_LAST; j++) { if (type == offsets[j].type) { offset = offsets[j].offset; @@ -5860,16 +5800,21 @@ ice_fill_adv_packet_tun(struct ice_hw *hw, enum ice_sw_tunnel_type tun_type, /** * ice_fill_adv_packet_vlan - fill dummy packet with VLAN tag type + * @hw: pointer to hw structure * @vlan_type: VLAN tag type * @pkt: dummy packet to fill in * @offsets: offset info for the dummy packet */ static int -ice_fill_adv_packet_vlan(u16 vlan_type, u8 *pkt, +ice_fill_adv_packet_vlan(struct ice_hw *hw, u16 vlan_type, u8 *pkt, const struct ice_dummy_pkt_offsets *offsets) { u16 i; + /* Check if there is something to do */ + if (!vlan_type || !ice_is_dvm_ena(hw)) + return 0; + /* Find VLAN header and insert VLAN TPID */ for (i = 0; offsets[i].type != ICE_PROTOCOL_LAST; i++) { if (offsets[i].type == ICE_VLAN_OFOS || @@ -5888,6 +5833,15 @@ ice_fill_adv_packet_vlan(u16 vlan_type, u8 *pkt, return -EIO; } +static bool ice_rules_equal(const struct ice_adv_rule_info *first, + const struct ice_adv_rule_info *second) +{ + return first->sw_act.flag == second->sw_act.flag && + first->tun_type == second->tun_type && + first->vlan_type == second->vlan_type && + first->src_vsi == second->src_vsi; +} + /** * ice_find_adv_rule_entry - Search a rule entry * @hw: pointer to the hardware structure @@ -5921,9 +5875,7 @@ ice_find_adv_rule_entry(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, lkups_matched = false; break; } - if (rinfo->sw_act.flag == list_itr->rule_info.sw_act.flag && - rinfo->tun_type == list_itr->rule_info.tun_type && - rinfo->vlan_type == list_itr->rule_info.vlan_type && + if (ice_rules_equal(rinfo, &list_itr->rule_info) && lkups_matched) return list_itr; } @@ -6039,6 +5991,26 @@ ice_adv_add_update_vsi_list(struct ice_hw *hw, return status; } +void ice_rule_add_tunnel_metadata(struct ice_adv_lkup_elem *lkup) +{ + lkup->type = ICE_HW_METADATA; + lkup->m_u.metadata.flags[ICE_PKT_FLAGS_TUNNEL] = + cpu_to_be16(ICE_PKT_TUNNEL_MASK); +} + +void ice_rule_add_vlan_metadata(struct ice_adv_lkup_elem *lkup) +{ + lkup->type = ICE_HW_METADATA; + lkup->m_u.metadata.flags[ICE_PKT_FLAGS_VLAN] = + cpu_to_be16(ICE_PKT_VLAN_MASK); +} + +void ice_rule_add_src_vsi_metadata(struct ice_adv_lkup_elem *lkup) +{ + lkup->type = ICE_HW_METADATA; + lkup->m_u.metadata.source_vsi = cpu_to_be16(ICE_MDID_SOURCE_VSI_MASK); +} + /** * ice_add_adv_rule - helper function to create an advanced switch rule * @hw: pointer to the hardware structure @@ -6120,7 +6092,10 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, if (rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI) rinfo->sw_act.fwd_id.hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle); - if (rinfo->sw_act.flag & ICE_FLTR_TX) + + if (rinfo->src_vsi) + rinfo->sw_act.src = ice_get_hw_vsi_num(hw, rinfo->src_vsi); + else rinfo->sw_act.src = ice_get_hw_vsi_num(hw, vsi_handle); status = ice_add_adv_recipe(hw, lkups, lkups_cnt, rinfo, &rid); @@ -6189,19 +6164,20 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, goto err_ice_add_adv_rule; } - /* set the rule LOOKUP type based on caller specified 'Rx' - * instead of hardcoding it to be either LOOKUP_TX/RX + /* If there is no matching criteria for direction there + * is only one difference between Rx and Tx: + * - get switch id base on VSI number from source field (Tx) + * - get switch id base on port number (Rx) * - * for 'Rx' set the source to be the port number - * for 'Tx' set the source to be the source HW VSI number (determined - * by caller) + * If matching on direction metadata is chose rule direction is + * extracted from type value set here. */ - if (rinfo->rx) { - s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX); - s_rule->src = cpu_to_le16(hw->port_info->lport); - } else { + if (rinfo->sw_act.flag & ICE_FLTR_TX) { s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_TX); s_rule->src = cpu_to_le16(rinfo->sw_act.src); + } else { + s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX); + s_rule->src = cpu_to_le16(hw->port_info->lport); } s_rule->recipe_id = cpu_to_le16(rid); @@ -6211,22 +6187,16 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, if (status) goto err_ice_add_adv_rule; - if (rinfo->tun_type != ICE_NON_TUN && - rinfo->tun_type != ICE_SW_TUN_AND_NON_TUN) { - status = ice_fill_adv_packet_tun(hw, rinfo->tun_type, - s_rule->hdr_data, - profile->offsets); - if (status) - goto err_ice_add_adv_rule; - } + status = ice_fill_adv_packet_tun(hw, rinfo->tun_type, s_rule->hdr_data, + profile->offsets); + if (status) + goto err_ice_add_adv_rule; - if (rinfo->vlan_type != 0 && ice_is_dvm_ena(hw)) { - status = ice_fill_adv_packet_vlan(rinfo->vlan_type, - s_rule->hdr_data, - profile->offsets); - if (status) - goto err_ice_add_adv_rule; - } + status = ice_fill_adv_packet_vlan(hw, rinfo->vlan_type, + s_rule->hdr_data, + profile->offsets); + if (status) + goto err_ice_add_adv_rule; status = ice_aq_sw_rules(hw, (struct ice_aqc_sw_rules *)s_rule, rule_buf_sz, 1, ice_aqc_opc_add_sw_rules, @@ -6469,13 +6439,6 @@ ice_rem_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, return -EIO; } - /* Create any special protocol/offset pairs, such as looking at tunnel - * bits by extracting metadata - */ - status = ice_add_special_words(rinfo, &lkup_exts, ice_is_dvm_ena(hw)); - if (status) - return status; - rid = ice_find_recp(hw, &lkup_exts, rinfo->tun_type); /* If did not find a recipe that match the existing criteria */ if (rid == ICE_MAX_NUM_RECIPES) diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index 68d8e8a6a189..c84b56fe84a5 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -10,7 +10,6 @@ #define ICE_DFLT_VSI_INVAL 0xff #define ICE_FLTR_RX BIT(0) #define ICE_FLTR_TX BIT(1) -#define ICE_FLTR_TX_RX (ICE_FLTR_RX | ICE_FLTR_TX) #define ICE_VSI_INVAL_ID 0xffff #define ICE_INVAL_Q_HANDLE 0xFFFF @@ -187,12 +186,13 @@ struct ice_adv_rule_flags_info { }; struct ice_adv_rule_info { + /* Store metadata values in rule info */ enum ice_sw_tunnel_type tun_type; - struct ice_sw_act_ctrl sw_act; - u32 priority; - u8 rx; /* true means LOOKUP_RX otherwise LOOKUP_TX */ - u16 fltr_rule_id; u16 vlan_type; + u16 fltr_rule_id; + u32 priority; + u16 src_vsi; + struct ice_sw_act_ctrl sw_act; struct ice_adv_rule_flags_info flags_info; }; @@ -342,6 +342,9 @@ ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items, u16 counter_id); /* Switch/bridge related commands */ +void ice_rule_add_tunnel_metadata(struct ice_adv_lkup_elem *lkup); +void ice_rule_add_vlan_metadata(struct ice_adv_lkup_elem *lkup); +void ice_rule_add_src_vsi_metadata(struct ice_adv_lkup_elem *lkup); int ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, struct ice_adv_rule_info *rinfo, diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index d1a31f236d26..4a34ef5f58d3 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -54,6 +54,10 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers, if (flags & (ICE_TC_FLWR_FIELD_VLAN | ICE_TC_FLWR_FIELD_VLAN_PRIO)) lkups_cnt++; + /* is VLAN TPID specified */ + if (flags & ICE_TC_FLWR_FIELD_VLAN_TPID) + lkups_cnt++; + /* is CVLAN specified? */ if (flags & (ICE_TC_FLWR_FIELD_CVLAN | ICE_TC_FLWR_FIELD_CVLAN_PRIO)) lkups_cnt++; @@ -80,6 +84,10 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers, ICE_TC_FLWR_FIELD_SRC_L4_PORT)) lkups_cnt++; + /* matching for tunneled packets in metadata */ + if (fltr->tunnel_type != TNL_LAST) + lkups_cnt++; + return lkups_cnt; } @@ -320,6 +328,10 @@ ice_tc_fill_tunnel_outer(u32 flags, struct ice_tc_flower_fltr *fltr, i++; } + /* always fill matching on tunneled packets in metadata */ + ice_rule_add_tunnel_metadata(&list[i]); + i++; + return i; } @@ -390,10 +402,6 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags, /* copy VLAN info */ if (flags & (ICE_TC_FLWR_FIELD_VLAN | ICE_TC_FLWR_FIELD_VLAN_PRIO)) { - vlan_tpid = be16_to_cpu(headers->vlan_hdr.vlan_tpid); - rule_info->vlan_type = - ice_check_supported_vlan_tpid(vlan_tpid); - if (flags & ICE_TC_FLWR_FIELD_CVLAN) list[i].type = ICE_VLAN_EX; else @@ -418,6 +426,15 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags, i++; } + if (flags & ICE_TC_FLWR_FIELD_VLAN_TPID) { + vlan_tpid = be16_to_cpu(headers->vlan_hdr.vlan_tpid); + rule_info->vlan_type = + ice_check_supported_vlan_tpid(vlan_tpid); + + ice_rule_add_vlan_metadata(&list[i]); + i++; + } + if (flags & (ICE_TC_FLWR_FIELD_CVLAN | ICE_TC_FLWR_FIELD_CVLAN_PRIO)) { list[i].type = ICE_VLAN_IN; @@ -698,12 +715,10 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) if (fltr->direction == ICE_ESWITCH_FLTR_INGRESS) { rule_info.sw_act.flag |= ICE_FLTR_RX; rule_info.sw_act.src = hw->pf_id; - rule_info.rx = true; rule_info.flags_info.act = ICE_SINGLE_ACT_LB_ENABLE; } else { rule_info.sw_act.flag |= ICE_FLTR_TX; rule_info.sw_act.src = vsi->idx; - rule_info.rx = false; rule_info.flags_info.act = ICE_SINGLE_ACT_LAN_ENABLE; } @@ -735,17 +750,16 @@ exit: /** * ice_locate_vsi_using_queue - locate VSI using queue (forward to queue action) * @vsi: Pointer to VSI - * @tc_fltr: Pointer to tc_flower_filter + * @queue: Queue index * - * Locate the VSI using specified queue. When ADQ is not enabled, always - * return input VSI, otherwise locate corresponding VSI based on per channel - * offset and qcount + * Locate the VSI using specified "queue". When ADQ is not enabled, + * always return input VSI, otherwise locate corresponding + * VSI based on per channel "offset" and "qcount" */ -static struct ice_vsi * -ice_locate_vsi_using_queue(struct ice_vsi *vsi, - struct ice_tc_flower_fltr *tc_fltr) +struct ice_vsi * +ice_locate_vsi_using_queue(struct ice_vsi *vsi, int queue) { - int num_tc, tc, queue; + int num_tc, tc; /* if ADQ is not active, passed VSI is the candidate VSI */ if (!ice_is_adq_active(vsi->back)) @@ -755,7 +769,6 @@ ice_locate_vsi_using_queue(struct ice_vsi *vsi, * upon queue number) */ num_tc = vsi->mqprio_qopt.qopt.num_tc; - queue = tc_fltr->action.fwd.q.queue; for (tc = 0; tc < num_tc; tc++) { int qcount = vsi->mqprio_qopt.qopt.count[tc]; @@ -797,6 +810,7 @@ ice_tc_forward_action(struct ice_vsi *vsi, struct ice_tc_flower_fltr *tc_fltr) struct ice_pf *pf = vsi->back; struct device *dev; u32 tc_class; + int q; dev = ice_pf_to_dev(pf); @@ -825,7 +839,8 @@ ice_tc_forward_action(struct ice_vsi *vsi, struct ice_tc_flower_fltr *tc_fltr) /* Determine destination VSI even though the action is * FWD_TO_QUEUE, because QUEUE is associated with VSI */ - dest_vsi = tc_fltr->dest_vsi; + q = tc_fltr->action.fwd.q.queue; + dest_vsi = ice_locate_vsi_using_queue(vsi, q); break; default: dev_err(dev, @@ -910,7 +925,6 @@ ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi, rule_info.sw_act.vsi_handle = dest_vsi->idx; rule_info.priority = ICE_SWITCH_FLTR_PRIO_VSI; rule_info.sw_act.src = hw->pf_id; - rule_info.rx = true; dev_dbg(dev, "add switch rule for TC:%u vsi_idx:%u, lkups_cnt:%u\n", tc_fltr->action.fwd.tc.tc_class, rule_info.sw_act.vsi_handle, lkups_cnt); @@ -921,7 +935,6 @@ ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi, rule_info.sw_act.vsi_handle = dest_vsi->idx; rule_info.priority = ICE_SWITCH_FLTR_PRIO_QUEUE; rule_info.sw_act.src = hw->pf_id; - rule_info.rx = true; dev_dbg(dev, "add switch rule action to forward to queue:%u (HW queue %u), lkups_cnt:%u\n", tc_fltr->action.fwd.q.queue, tc_fltr->action.fwd.q.hw_queue, lkups_cnt); @@ -929,7 +942,6 @@ ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi, case ICE_DROP_PACKET: rule_info.sw_act.flag |= ICE_FLTR_RX; rule_info.sw_act.src = hw->pf_id; - rule_info.rx = true; rule_info.priority = ICE_SWITCH_FLTR_PRIO_VSI; break; default: @@ -1460,8 +1472,10 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, VLAN_PRIO_MASK); } - if (match.mask->vlan_tpid) + if (match.mask->vlan_tpid) { headers->vlan_hdr.vlan_tpid = match.key->vlan_tpid; + fltr->flags |= ICE_TC_FLWR_FIELD_VLAN_TPID; + } } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { @@ -1702,7 +1716,7 @@ ice_tc_forward_to_queue(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr, /* If ADQ is configured, and the queue belongs to ADQ VSI, then prepare * ADQ switch filter */ - ch_vsi = ice_locate_vsi_using_queue(vsi, fltr); + ch_vsi = ice_locate_vsi_using_queue(vsi, fltr->action.fwd.q.queue); if (!ch_vsi) return -EINVAL; fltr->dest_vsi = ch_vsi; diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h index 8d5e22ac7023..65d387163a46 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h @@ -33,6 +33,7 @@ #define ICE_TC_FLWR_FIELD_L2TPV3_SESSID BIT(26) #define ICE_TC_FLWR_FIELD_VLAN_PRIO BIT(27) #define ICE_TC_FLWR_FIELD_CVLAN_PRIO BIT(28) +#define ICE_TC_FLWR_FIELD_VLAN_TPID BIT(29) #define ICE_TC_FLOWER_MASK_32 0xFFFFFFFF @@ -203,6 +204,7 @@ static inline int ice_chnl_dmac_fltr_cnt(struct ice_pf *pf) return pf->num_dmac_chnl_fltrs; } +struct ice_vsi *ice_locate_vsi_using_queue(struct ice_vsi *vsi, int queue); int ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi, struct flow_cls_offload *cls_flower); diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index bf74a2f3a4f8..ea3310be8354 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -186,25 +186,6 @@ int ice_check_vf_ready_for_cfg(struct ice_vf *vf) } /** - * ice_check_vf_ready_for_reset - check if VF is ready to be reset - * @vf: VF to check if it's ready to be reset - * - * The purpose of this function is to ensure that the VF is not in reset, - * disabled, and is both initialized and active, thus enabling us to safely - * initialize another reset. - */ -int ice_check_vf_ready_for_reset(struct ice_vf *vf) -{ - int ret; - - ret = ice_check_vf_ready_for_cfg(vf); - if (!ret && !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) - ret = -EAGAIN; - - return ret; -} - -/** * ice_trigger_vf_reset - Reset a VF on HW * @vf: pointer to the VF structure * @is_vflr: true if VFLR was issued, false if not @@ -631,11 +612,17 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) return 0; } + if (flags & ICE_VF_RESET_LOCK) + mutex_lock(&vf->cfg_lock); + else + lockdep_assert_held(&vf->cfg_lock); + if (ice_is_vf_disabled(vf)) { vsi = ice_get_vf_vsi(vf); if (!vsi) { dev_dbg(dev, "VF is already removed\n"); - return -EINVAL; + err = -EINVAL; + goto out_unlock; } ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id); @@ -644,14 +631,9 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n", vf->vf_id); - return 0; + goto out_unlock; } - if (flags & ICE_VF_RESET_LOCK) - mutex_lock(&vf->cfg_lock); - else - lockdep_assert_held(&vf->cfg_lock); - /* Set VF disable bit state here, before triggering reset */ set_bit(ICE_VF_STATE_DIS, vf->vf_states); ice_trigger_vf_reset(vf, flags & ICE_VF_RESET_VFLR, false); @@ -689,8 +671,6 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) */ ice_vf_clear_all_promisc_modes(vf, vsi); - ice_eswitch_del_vf_mac_rule(vf); - ice_vf_fdir_exit(vf); ice_vf_fdir_init(vf); /* clean VF control VSI when resetting VF since it should be setup @@ -716,7 +696,6 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) } ice_eswitch_update_repr(vsi); - ice_eswitch_replay_vf_mac_rule(vf); /* if the VF has been reset allow it to come up again */ ice_mbx_clear_malvf(&vf->mbx_info); @@ -1329,3 +1308,35 @@ void ice_vf_set_initialized(struct ice_vf *vf) set_bit(ICE_VF_STATE_INIT, vf->vf_states); memset(&vf->vlan_v2_caps, 0, sizeof(vf->vlan_v2_caps)); } + +/** + * ice_get_vf_ctrl_vsi - Get first VF control VSI pointer + * @pf: the PF private structure + * @vsi: pointer to the VSI + * + * Return first found VF control VSI other than the vsi + * passed by parameter. This function is used to determine + * whether new resources have to be allocated for control VSI + * or they can be shared with existing one. + * + * Return found VF control VSI pointer other itself. Return + * NULL Otherwise. + * + */ +struct ice_vsi *ice_get_vf_ctrl_vsi(struct ice_pf *pf, struct ice_vsi *vsi) +{ + struct ice_vsi *ctrl_vsi = NULL; + struct ice_vf *vf; + unsigned int bkt; + + rcu_read_lock(); + ice_for_each_vf_rcu(pf, bkt, vf) { + if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI) { + ctrl_vsi = pf->vsi[vf->ctrl_vsi_idx]; + break; + } + } + + rcu_read_unlock(); + return ctrl_vsi; +} diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h index a38ef00a3679..48fea6fa0362 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h @@ -215,7 +215,6 @@ u16 ice_get_num_vfs(struct ice_pf *pf); struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf); bool ice_is_vf_disabled(struct ice_vf *vf); int ice_check_vf_ready_for_cfg(struct ice_vf *vf); -int ice_check_vf_ready_for_reset(struct ice_vf *vf); void ice_set_vf_state_dis(struct ice_vf *vf); bool ice_is_any_vf_in_unicast_promisc(struct ice_pf *pf); void @@ -227,6 +226,7 @@ int ice_vf_clear_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m); int ice_reset_vf(struct ice_vf *vf, u32 flags); void ice_reset_all_vfs(struct ice_pf *pf); +struct ice_vsi *ice_get_vf_ctrl_vsi(struct ice_pf *pf, struct ice_vsi *vsi); #else /* CONFIG_PCI_IOV */ static inline struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id) { @@ -291,6 +291,12 @@ static inline int ice_reset_vf(struct ice_vf *vf, u32 flags) static inline void ice_reset_all_vfs(struct ice_pf *pf) { } + +static inline struct ice_vsi * +ice_get_vf_ctrl_vsi(struct ice_pf *pf, struct ice_vsi *vsi) +{ + return NULL; +} #endif /* !CONFIG_PCI_IOV */ #endif /* _ICE_VF_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index f4a524f80b11..dcf628b1fccd 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -3730,7 +3730,6 @@ static int ice_vc_repr_add_mac(struct ice_vf *vf, u8 *msg) for (i = 0; i < al->num_elements; i++) { u8 *mac_addr = al->list[i].addr; - int result; if (!is_unicast_ether_addr(mac_addr) || ether_addr_equal(mac_addr, vf->hw_lan_addr)) @@ -3742,13 +3741,6 @@ static int ice_vc_repr_add_mac(struct ice_vf *vf, u8 *msg) goto handle_mac_exit; } - result = ice_eswitch_add_vf_mac_rule(pf, vf, mac_addr); - if (result) { - dev_err(ice_pf_to_dev(pf), "Failed to add MAC %pM for VF %d\n, error %d\n", - mac_addr, vf->vf_id, result); - goto handle_mac_exit; - } - ice_vfhw_mac_add(vf, &al->list[i]); vf->num_mac++; break; @@ -3955,7 +3947,6 @@ error_handler: ice_vc_notify_vf_link_state(vf); break; case VIRTCHNL_OP_RESET_VF: - clear_bit(ICE_VF_STATE_ACTIVE, vf->vf_states); ops->reset_vf(vf); break; case VIRTCHNL_OP_ADD_ETH_ADDR: diff --git a/drivers/net/ethernet/intel/ice/ice_vlan_mode.c b/drivers/net/ethernet/intel/ice/ice_vlan_mode.c index bcda2e004807..1279c1ffe31c 100644 --- a/drivers/net/ethernet/intel/ice/ice_vlan_mode.c +++ b/drivers/net/ethernet/intel/ice/ice_vlan_mode.c @@ -219,7 +219,7 @@ static struct ice_update_recipe_lkup_idx_params ice_dvm_dflt_recipes[] = { .rid = ICE_SW_LKUP_VLAN, .fv_idx = ICE_PKT_FLAGS_0_TO_15_FV_IDX, .ignore_valid = false, - .mask = ICE_PKT_FLAGS_0_TO_15_VLAN_FLAGS_MASK, + .mask = ICE_PKT_VLAN_MASK, .mask_valid = true, .lkup_idx = ICE_SW_LKUP_VLAN_PKT_FLAGS_LKUP_IDX, }, diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index d1e489da7363..a7fe2b4ce655 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -90,7 +90,6 @@ ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring, { struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; - int base = vsi->base_vector; u16 reg; u32 val; @@ -103,11 +102,9 @@ ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring, wr32(hw, QINT_RQCTL(reg), val); if (q_vector) { - u16 v_idx = q_vector->v_idx; - wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), 0); ice_flush(hw); - synchronize_irq(pf->msix_entries[v_idx + base].vector); + synchronize_irq(q_vector->irq.virq); } } diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 015b78144114..a2b759531cb7 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -34,11 +34,11 @@ struct igb_adapter; /* TX/RX descriptor defines */ #define IGB_DEFAULT_TXD 256 #define IGB_DEFAULT_TX_WORK 128 -#define IGB_MIN_TXD 80 +#define IGB_MIN_TXD 64 #define IGB_MAX_TXD 4096 #define IGB_DEFAULT_RXD 256 -#define IGB_MIN_RXD 80 +#define IGB_MIN_RXD 64 #define IGB_MAX_RXD 4096 #define IGB_DEFAULT_ITR 3 /* dynamic */ diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index bb3db387d49c..12f106b3a878 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -183,11 +183,13 @@ static int igb_resume(struct device *); static int igb_runtime_suspend(struct device *dev); static int igb_runtime_resume(struct device *dev); static int igb_runtime_idle(struct device *dev); +#ifdef CONFIG_PM static const struct dev_pm_ops igb_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume) SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume, igb_runtime_idle) }; +#endif static void igb_shutdown(struct pci_dev *); static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs); #ifdef CONFIG_IGB_DCA @@ -3825,8 +3827,11 @@ static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs, bool reinit) } /* only call pci_enable_sriov() if no VFs are allocated already */ - if (!old_vfs) + if (!old_vfs) { err = pci_enable_sriov(pdev, adapter->vfs_allocated_count); + if (err) + goto err_out; + } goto out; @@ -3931,8 +3936,9 @@ static void igb_probe_vfs(struct igb_adapter *adapter) struct pci_dev *pdev = adapter->pdev; struct e1000_hw *hw = &adapter->hw; - /* Virtualization features not supported on i210 family. */ - if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) + /* Virtualization features not supported on i210 and 82580 family. */ + if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211) || + (hw->mac.type == e1000_82580)) return; /* Of the below we really only want the effect of getting @@ -4812,6 +4818,10 @@ void igb_configure_rx_ring(struct igb_adapter *adapter, static void igb_set_rx_buffer_len(struct igb_adapter *adapter, struct igb_ring *rx_ring) { +#if (PAGE_SIZE < 8192) + struct e1000_hw *hw = &adapter->hw; +#endif + /* set build_skb and buffer size flags */ clear_ring_build_skb_enabled(rx_ring); clear_ring_uses_large_buffer(rx_ring); @@ -4822,10 +4832,9 @@ static void igb_set_rx_buffer_len(struct igb_adapter *adapter, set_ring_build_skb_enabled(rx_ring); #if (PAGE_SIZE < 8192) - if (adapter->max_frame_size <= IGB_MAX_FRAME_BUILD_SKB) - return; - - set_ring_uses_large_buffer(rx_ring); + if (adapter->max_frame_size > IGB_MAX_FRAME_BUILD_SKB || + rd32(E1000_RCTL) & E1000_RCTL_SBP) + set_ring_uses_large_buffer(rx_ring); #endif } @@ -9585,6 +9594,11 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); + if (state == pci_channel_io_normal) { + dev_warn(&pdev->dev, "Non-correctable non-fatal error reported.\n"); + return PCI_ERS_RESULT_CAN_RECOVER; + } + netif_device_detach(netdev); if (state == pci_channel_io_perm_failure) diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 405886ee5261..319c544b9f04 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -1385,18 +1385,6 @@ void igb_ptp_init(struct igb_adapter *adapter) return; } - spin_lock_init(&adapter->tmreg_lock); - INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work); - - if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK) - INIT_DELAYED_WORK(&adapter->ptp_overflow_work, - igb_ptp_overflow_check); - - adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; - adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF; - - igb_ptp_reset(adapter); - adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps, &adapter->pdev->dev); if (IS_ERR(adapter->ptp_clock)) { @@ -1406,6 +1394,18 @@ void igb_ptp_init(struct igb_adapter *adapter) dev_info(&adapter->pdev->dev, "added PHC on %s\n", adapter->netdev->name); adapter->ptp_flags |= IGB_PTP_ENABLED; + + spin_lock_init(&adapter->tmreg_lock); + INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work); + + if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK) + INIT_DELAYED_WORK(&adapter->ptp_overflow_work, + igb_ptp_overflow_check); + + adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; + adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF; + + igb_ptp_reset(adapter); } } diff --git a/drivers/net/ethernet/intel/igbvf/igbvf.h b/drivers/net/ethernet/intel/igbvf/igbvf.h index 57d39ee00b58..7b83678ba83a 100644 --- a/drivers/net/ethernet/intel/igbvf/igbvf.h +++ b/drivers/net/ethernet/intel/igbvf/igbvf.h @@ -39,11 +39,11 @@ enum latency_range { /* Tx/Rx descriptor defines */ #define IGBVF_DEFAULT_TXD 256 #define IGBVF_MAX_TXD 4096 -#define IGBVF_MIN_TXD 80 +#define IGBVF_MIN_TXD 64 #define IGBVF_DEFAULT_RXD 256 #define IGBVF_MAX_RXD 4096 -#define IGBVF_MIN_RXD 80 +#define IGBVF_MIN_RXD 64 #define IGBVF_MIN_ITR_USECS 10 /* 100000 irq/sec */ #define IGBVF_MAX_ITR_USECS 10000 /* 100 irq/sec */ diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 34aebf00a512..b4077c3f62ed 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -13,6 +13,8 @@ #include <linux/ptp_clock_kernel.h> #include <linux/timecounter.h> #include <linux/net_tstamp.h> +#include <linux/bitfield.h> +#include <linux/hrtimer.h> #include "igc_hw.h" @@ -100,6 +102,8 @@ struct igc_ring { u32 start_time; u32 end_time; u32 max_sdu; + bool oper_gate_closed; /* Operating gate. True if the TX Queue is closed */ + bool admin_gate_closed; /* Future gate. True if the TX Queue will be closed */ /* CBS parameters */ bool cbs_enable; /* indicates if CBS is enabled */ @@ -159,6 +163,7 @@ struct igc_adapter { struct timer_list watchdog_timer; struct timer_list dma_err_timer; struct timer_list phy_info_timer; + struct hrtimer hrtimer; u32 wol; u32 en_mng_pt; @@ -183,10 +188,17 @@ struct igc_adapter { u32 max_frame_size; u32 min_frame_size; + int tc_setup_type; ktime_t base_time; ktime_t cycle_time; - bool qbv_enable; + bool taprio_offload_enable; u32 qbv_config_change_errors; + bool qbv_transition; + unsigned int qbv_count; + /* Access to oper_gate_closed, admin_gate_closed and qbv_transition + * are protected by the qbv_tx_lock. + */ + spinlock_t qbv_tx_lock; /* OS defined structs */ struct pci_dev *pdev; @@ -228,7 +240,10 @@ struct igc_adapter { struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_caps; - struct work_struct ptp_tx_work; + /* Access to ptp_tx_skb and ptp_tx_start are protected by the + * ptp_tx_lock. + */ + spinlock_t ptp_tx_lock; struct sk_buff *ptp_tx_skb; struct hwtstamp_config tstamp_config; unsigned long ptp_tx_start; @@ -311,6 +326,33 @@ extern char igc_driver_name[]; #define IGC_MRQC_RSS_FIELD_IPV4_UDP 0x00400000 #define IGC_MRQC_RSS_FIELD_IPV6_UDP 0x00800000 +/* RX-desc Write-Back format RSS Type's */ +enum igc_rss_type_num { + IGC_RSS_TYPE_NO_HASH = 0, + IGC_RSS_TYPE_HASH_TCP_IPV4 = 1, + IGC_RSS_TYPE_HASH_IPV4 = 2, + IGC_RSS_TYPE_HASH_TCP_IPV6 = 3, + IGC_RSS_TYPE_HASH_IPV6_EX = 4, + IGC_RSS_TYPE_HASH_IPV6 = 5, + IGC_RSS_TYPE_HASH_TCP_IPV6_EX = 6, + IGC_RSS_TYPE_HASH_UDP_IPV4 = 7, + IGC_RSS_TYPE_HASH_UDP_IPV6 = 8, + IGC_RSS_TYPE_HASH_UDP_IPV6_EX = 9, + IGC_RSS_TYPE_MAX = 10, +}; +#define IGC_RSS_TYPE_MAX_TABLE 16 +#define IGC_RSS_TYPE_MASK GENMASK(3,0) /* 4-bits (3:0) = mask 0x0F */ + +/* igc_rss_type - Rx descriptor RSS type field */ +static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc) +{ + /* RSS Type 4-bits (3:0) number: 0-9 (above 9 is reserved) + * Accessing the same bits via u16 (wb.lower.lo_dword.hs_rss.pkt_info) + * is slightly slower than via u32 (wb.lower.lo_dword.data) + */ + return le32_get_bits(rx_desc->wb.lower.lo_dword.data, IGC_RSS_TYPE_MASK); +} + /* Interrupt defines */ #define IGC_START_ITR 648 /* ~6000 ints/sec */ #define IGC_4K_ITR 980 @@ -326,11 +368,11 @@ extern char igc_driver_name[]; /* TX/RX descriptor defines */ #define IGC_DEFAULT_TXD 256 #define IGC_DEFAULT_TX_WORK 128 -#define IGC_MIN_TXD 80 +#define IGC_MIN_TXD 64 #define IGC_MAX_TXD 4096 #define IGC_DEFAULT_RXD 256 -#define IGC_MIN_RXD 80 +#define IGC_MIN_RXD 64 #define IGC_MAX_RXD 4096 /* Supported Rx Buffer Sizes */ @@ -401,7 +443,6 @@ enum igc_state_t { __IGC_TESTING, __IGC_RESETTING, __IGC_DOWN, - __IGC_PTP_TX_IN_PROGRESS, }; enum igc_tx_flags { @@ -471,6 +512,13 @@ struct igc_rx_buffer { }; }; +/* context wrapper around xdp_buff to provide access to descriptor metadata */ +struct igc_xdp_buff { + struct xdp_buff xdp; + union igc_adv_rx_desc *rx_desc; + ktime_t rx_ts; /* data indication bit IGC_RXDADV_STAT_TSIP */ +}; + struct igc_q_vector { struct igc_adapter *adapter; /* backlink */ void __iomem *itr_register; @@ -578,6 +626,7 @@ enum igc_ring_flags_t { IGC_RING_FLAG_TX_CTX_IDX, IGC_RING_FLAG_TX_DETECT_HANG, IGC_RING_FLAG_AF_XDP_ZC, + IGC_RING_FLAG_TX_HWTSTAMP, }; #define ring_uses_large_buffer(ring) \ @@ -634,6 +683,7 @@ int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr); int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr); void igc_ptp_tx_hang(struct igc_adapter *adapter); void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts); +void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter); #define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring)) diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 44a507029946..2f780cc90883 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -546,7 +546,7 @@ #define IGC_PTM_CTRL_START_NOW BIT(29) /* Start PTM Now */ #define IGC_PTM_CTRL_EN BIT(30) /* Enable PTM */ #define IGC_PTM_CTRL_TRIG BIT(31) /* PTM Cycle trigger */ -#define IGC_PTM_CTRL_SHRT_CYC(usec) (((usec) & 0x2f) << 2) +#define IGC_PTM_CTRL_SHRT_CYC(usec) (((usec) & 0x3f) << 2) #define IGC_PTM_CTRL_PTM_TO(usec) (((usec) & 0xff) << 8) #define IGC_PTM_SHORT_CYC_DEFAULT 10 /* Default Short/interrupted cycle interval */ diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 0e2cb00622d1..93bce729be76 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1708,6 +1708,8 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev, /* twisted pair */ cmd->base.port = PORT_TP; cmd->base.phy_address = hw->phy.addr; + ethtool_link_ksettings_add_link_mode(cmd, supported, TP); + ethtool_link_ksettings_add_link_mode(cmd, advertising, TP); /* advertising link modes */ if (hw->phy.autoneg_advertised & ADVERTISE_10_HALF) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index fa764190f270..6f557e843e49 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -316,6 +316,33 @@ static void igc_clean_all_tx_rings(struct igc_adapter *adapter) igc_clean_tx_ring(adapter->tx_ring[i]); } +static void igc_disable_tx_ring_hw(struct igc_ring *ring) +{ + struct igc_hw *hw = &ring->q_vector->adapter->hw; + u8 idx = ring->reg_idx; + u32 txdctl; + + txdctl = rd32(IGC_TXDCTL(idx)); + txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE; + txdctl |= IGC_TXDCTL_SWFLUSH; + wr32(IGC_TXDCTL(idx), txdctl); +} + +/** + * igc_disable_all_tx_rings_hw - Disable all transmit queue operation + * @adapter: board private structure + */ +static void igc_disable_all_tx_rings_hw(struct igc_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *tx_ring = adapter->tx_ring[i]; + + igc_disable_tx_ring_hw(tx_ring); + } +} + /** * igc_setup_tx_resources - allocate Tx resources (Descriptors) * @tx_ring: tx descriptor ring (for a specific queue) to setup @@ -711,7 +738,6 @@ static void igc_configure_tx_ring(struct igc_adapter *adapter, /* disable the queue */ wr32(IGC_TXDCTL(reg_idx), 0); wrfl(); - mdelay(10); wr32(IGC_TDLEN(reg_idx), ring->count * sizeof(union igc_adv_tx_desc)); @@ -1017,7 +1043,7 @@ static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime, ktime_t base_time = adapter->base_time; ktime_t now = ktime_get_clocktai(); ktime_t baset_est, end_of_cycle; - u32 launchtime; + s32 launchtime; s64 n; n = div64_s64(ktime_sub_ns(now, base_time), cycle_time); @@ -1030,7 +1056,7 @@ static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime, *first_flag = true; ring->last_ff_cycle = baset_est; - if (ktime_compare(txtime, ring->last_tx_cycle) > 0) + if (ktime_compare(end_of_cycle, ring->last_tx_cycle) > 0) *insert_empty = true; } } @@ -1573,26 +1599,24 @@ done: first->bytecount = skb->len; first->gso_segs = 1; - if (tx_ring->max_sdu > 0) { - u32 max_sdu = 0; - - max_sdu = tx_ring->max_sdu + - (skb_vlan_tagged(first->skb) ? VLAN_HLEN : 0); + if (adapter->qbv_transition || tx_ring->oper_gate_closed) + goto out_drop; - if (first->bytecount > max_sdu) { - adapter->stats.txdrop++; - goto out_drop; - } + if (tx_ring->max_sdu > 0 && first->bytecount > tx_ring->max_sdu) { + adapter->stats.txdrop++; + goto out_drop; } - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) && + skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { /* FIXME: add support for retrieving timestamps from * the other timer registers before skipping the * timestamping request. */ - if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && - !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS, - &adapter->state)) { + unsigned long flags; + + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); + if (!adapter->ptp_tx_skb) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= IGC_TX_FLAGS_TSTAMP; @@ -1601,6 +1625,8 @@ done: } else { adapter->tx_hwtstamp_skipped++; } + + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); } if (skb_vlan_tag_present(skb)) { @@ -1697,14 +1723,36 @@ static void igc_rx_checksum(struct igc_ring *ring, le32_to_cpu(rx_desc->wb.upper.status_error)); } +/* Mapping HW RSS Type to enum pkt_hash_types */ +static const enum pkt_hash_types igc_rss_type_table[IGC_RSS_TYPE_MAX_TABLE] = { + [IGC_RSS_TYPE_NO_HASH] = PKT_HASH_TYPE_L2, + [IGC_RSS_TYPE_HASH_TCP_IPV4] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_IPV4] = PKT_HASH_TYPE_L3, + [IGC_RSS_TYPE_HASH_TCP_IPV6] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_IPV6_EX] = PKT_HASH_TYPE_L3, + [IGC_RSS_TYPE_HASH_IPV6] = PKT_HASH_TYPE_L3, + [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_UDP_IPV4] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_UDP_IPV6] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = PKT_HASH_TYPE_L4, + [10] = PKT_HASH_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */ + [11] = PKT_HASH_TYPE_NONE, /* keep array sized for SW bit-mask */ + [12] = PKT_HASH_TYPE_NONE, /* to handle future HW revisons */ + [13] = PKT_HASH_TYPE_NONE, + [14] = PKT_HASH_TYPE_NONE, + [15] = PKT_HASH_TYPE_NONE, +}; + static inline void igc_rx_hash(struct igc_ring *ring, union igc_adv_rx_desc *rx_desc, struct sk_buff *skb) { - if (ring->netdev->features & NETIF_F_RXHASH) - skb_set_hash(skb, - le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), - PKT_HASH_TYPE_L3); + if (ring->netdev->features & NETIF_F_RXHASH) { + u32 rss_hash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss); + u32 rss_type = igc_rss_type(rx_desc); + + skb_set_hash(skb, rss_hash, igc_rss_type_table[rss_type]); + } } static void igc_rx_vlan(struct igc_ring *rx_ring, @@ -2221,6 +2269,8 @@ static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count) if (!count) return ok; + XSK_CHECK_PRIV_TYPE(struct igc_xdp_buff); + desc = IGC_RX_DESC(ring, i); bi = &ring->rx_buffer_info[i]; i -= ring->count; @@ -2394,6 +2444,8 @@ static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp) nq = txring_txq(ring); __netif_tx_lock(nq, cpu); + /* Avoid transmit queue timeout since we share it with the slow path */ + txq_trans_cond_update(nq); res = igc_xdp_init_tx_descriptor(ring, xdpf); __netif_tx_unlock(nq); return res; @@ -2505,8 +2557,8 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) union igc_adv_rx_desc *rx_desc; struct igc_rx_buffer *rx_buffer; unsigned int size, truesize; + struct igc_xdp_buff ctx; ktime_t timestamp = 0; - struct xdp_buff xdp; int pkt_offset = 0; void *pktbuf; @@ -2535,18 +2587,20 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) { timestamp = igc_ptp_rx_pktstamp(q_vector->adapter, pktbuf); + ctx.rx_ts = timestamp; pkt_offset = IGC_TS_HDR_LEN; size -= IGC_TS_HDR_LEN; } if (!skb) { - xdp_init_buff(&xdp, truesize, &rx_ring->xdp_rxq); - xdp_prepare_buff(&xdp, pktbuf - igc_rx_offset(rx_ring), + xdp_init_buff(&ctx.xdp, truesize, &rx_ring->xdp_rxq); + xdp_prepare_buff(&ctx.xdp, pktbuf - igc_rx_offset(rx_ring), igc_rx_offset(rx_ring) + pkt_offset, size, true); - xdp_buff_clear_frags_flag(&xdp); + xdp_buff_clear_frags_flag(&ctx.xdp); + ctx.rx_desc = rx_desc; - skb = igc_xdp_run_prog(adapter, &xdp); + skb = igc_xdp_run_prog(adapter, &ctx.xdp); } if (IS_ERR(skb)) { @@ -2568,9 +2622,9 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) } else if (skb) igc_add_rx_frag(rx_ring, rx_buffer, skb, size); else if (ring_uses_build_skb(rx_ring)) - skb = igc_build_skb(rx_ring, rx_buffer, &xdp); + skb = igc_build_skb(rx_ring, rx_buffer, &ctx.xdp); else - skb = igc_construct_skb(rx_ring, rx_buffer, &xdp, + skb = igc_construct_skb(rx_ring, rx_buffer, &ctx.xdp, timestamp); /* exit if we failed to retrieve a buffer */ @@ -2671,6 +2725,15 @@ static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector, napi_gro_receive(&q_vector->napi, skb); } +static struct igc_xdp_buff *xsk_buff_to_igc_ctx(struct xdp_buff *xdp) +{ + /* xdp_buff pointer used by ZC code path is alloc as xdp_buff_xsk. The + * igc_xdp_buff shares its layout with xdp_buff_xsk and private + * igc_xdp_buff fields fall into xdp_buff_xsk->cb + */ + return (struct igc_xdp_buff *)xdp; +} + static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) { struct igc_adapter *adapter = q_vector->adapter; @@ -2689,6 +2752,7 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) while (likely(total_packets < budget)) { union igc_adv_rx_desc *desc; struct igc_rx_buffer *bi; + struct igc_xdp_buff *ctx; ktime_t timestamp = 0; unsigned int size; int res; @@ -2706,9 +2770,13 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) bi = &ring->rx_buffer_info[ntc]; + ctx = xsk_buff_to_igc_ctx(bi->xdp); + ctx->rx_desc = desc; + if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) { timestamp = igc_ptp_rx_pktstamp(q_vector->adapter, bi->xdp->data); + ctx->rx_ts = timestamp; bi->xdp->data += IGC_TS_HDR_LEN; @@ -2787,15 +2855,18 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring) struct netdev_queue *nq = txring_txq(ring); union igc_adv_tx_desc *tx_desc = NULL; int cpu = smp_processor_id(); - u16 ntu = ring->next_to_use; struct xdp_desc xdp_desc; - u16 budget; + u16 budget, ntu; if (!netif_carrier_ok(ring->netdev)) return; __netif_tx_lock(nq, cpu); + /* Avoid transmit queue timeout since we share it with the slow path */ + txq_trans_cond_update(nq); + + ntu = ring->next_to_use; budget = igc_desc_unused(ring); while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) { @@ -2963,8 +3034,8 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) time_after(jiffies, tx_buffer->time_stamp + (adapter->tx_timeout_factor * HZ)) && !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF) && - (rd32(IGC_TDH(tx_ring->reg_idx)) != - readl(tx_ring->tail))) { + (rd32(IGC_TDH(tx_ring->reg_idx)) != readl(tx_ring->tail)) && + !tx_ring->oper_gate_closed) { /* detected Tx unit hang */ netdev_err(tx_ring->netdev, "Detected Tx Unit Hang\n" @@ -4730,6 +4801,7 @@ static int igc_sw_init(struct igc_adapter *adapter) adapter->nfc_rule_count = 0; spin_lock_init(&adapter->stats64_lock); + spin_lock_init(&adapter->qbv_tx_lock); /* Assume MSI-X interrupts, will be checked during IRQ allocation */ adapter->flags |= IGC_FLAG_HAS_MSIX; @@ -5014,6 +5086,7 @@ void igc_down(struct igc_adapter *adapter) /* clear VLAN promisc flag so VFTA will be updated if necessary */ adapter->flags &= ~IGC_FLAG_VLAN_PROMISC; + igc_disable_all_tx_rings_hw(adapter); igc_clean_all_tx_rings(adapter); igc_clean_all_rx_rings(adapter); } @@ -5219,7 +5292,7 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter) if (tsicr & IGC_TSICR_TXTS) { /* retrieve hardware timestamp */ - schedule_work(&adapter->ptp_tx_work); + igc_ptp_tx_tstamp_event(adapter); ack |= IGC_TSICR_TXTS; } @@ -6047,13 +6120,16 @@ static int igc_tsn_enable_launchtime(struct igc_adapter *adapter, return igc_tsn_offload_apply(adapter); } -static int igc_tsn_clear_schedule(struct igc_adapter *adapter) +static int igc_qbv_clear_schedule(struct igc_adapter *adapter) { + unsigned long flags; int i; adapter->base_time = 0; adapter->cycle_time = NSEC_PER_SEC; + adapter->taprio_offload_enable = false; adapter->qbv_config_change_errors = 0; + adapter->qbv_count = 0; for (i = 0; i < adapter->num_tx_queues; i++) { struct igc_ring *ring = adapter->tx_ring[i]; @@ -6063,6 +6139,26 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter) ring->max_sdu = 0; } + spin_lock_irqsave(&adapter->qbv_tx_lock, flags); + + adapter->qbv_transition = false; + + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *ring = adapter->tx_ring[i]; + + ring->oper_gate_closed = false; + ring->admin_gate_closed = false; + } + + spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); + + return 0; +} + +static int igc_tsn_clear_schedule(struct igc_adapter *adapter) +{ + igc_qbv_clear_schedule(adapter); + return 0; } @@ -6072,18 +6168,21 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, bool queue_configured[IGC_MAX_TX_QUEUES] = { }; struct igc_hw *hw = &adapter->hw; u32 start_time = 0, end_time = 0; + struct timespec64 now; + unsigned long flags; size_t n; int i; - adapter->qbv_enable = qopt->enable; - - if (!qopt->enable) + if (qopt->cmd == TAPRIO_CMD_DESTROY) return igc_tsn_clear_schedule(adapter); + if (qopt->cmd != TAPRIO_CMD_REPLACE) + return -EOPNOTSUPP; + if (qopt->base_time < 0) return -ERANGE; - if (igc_is_device_id_i225(hw) && adapter->base_time) + if (igc_is_device_id_i225(hw) && adapter->taprio_offload_enable) return -EALREADY; if (!validate_schedule(adapter, qopt)) @@ -6091,6 +6190,9 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, adapter->cycle_time = qopt->cycle_time; adapter->base_time = qopt->base_time; + adapter->taprio_offload_enable = true; + + igc_ptp_read(adapter, &now); for (n = 0; n < qopt->num_entries; n++) { struct tc_taprio_sched_entry *e = &qopt->entries[n]; @@ -6126,30 +6228,49 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, ring->start_time = start_time; ring->end_time = end_time; - queue_configured[i] = true; + if (ring->start_time >= adapter->cycle_time) + queue_configured[i] = false; + else + queue_configured[i] = true; } start_time += e->interval; } + spin_lock_irqsave(&adapter->qbv_tx_lock, flags); + /* Check whether a queue gets configured. * If not, set the start and end time to be end time. */ for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *ring = adapter->tx_ring[i]; + + if (!is_base_time_past(qopt->base_time, &now)) { + ring->admin_gate_closed = false; + } else { + ring->oper_gate_closed = false; + ring->admin_gate_closed = false; + } + if (!queue_configured[i]) { - struct igc_ring *ring = adapter->tx_ring[i]; + if (!is_base_time_past(qopt->base_time, &now)) + ring->admin_gate_closed = true; + else + ring->oper_gate_closed = true; ring->start_time = end_time; ring->end_time = end_time; } } + spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); + for (i = 0; i < adapter->num_tx_queues; i++) { struct igc_ring *ring = adapter->tx_ring[i]; struct net_device *dev = adapter->netdev; if (qopt->max_sdu[i]) - ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len; + ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len - ETH_TLEN; else ring->max_sdu = 0; } @@ -6269,6 +6390,8 @@ static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type, { struct igc_adapter *adapter = netdev_priv(dev); + adapter->tc_setup_type = type; + switch (type) { case TC_QUERY_CAPS: return igc_tc_query_caps(adapter, type_data); @@ -6321,6 +6444,9 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, __netif_tx_lock(nq, cpu); + /* Avoid transmit queue timeout since we share it with the slow path */ + txq_trans_cond_update(nq); + drops = 0; for (i = 0; i < num_frames; i++) { int err; @@ -6461,6 +6587,85 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg) return value; } +/* Mapping HW RSS Type to enum xdp_rss_hash_type */ +static enum xdp_rss_hash_type igc_xdp_rss_type[IGC_RSS_TYPE_MAX_TABLE] = { + [IGC_RSS_TYPE_NO_HASH] = XDP_RSS_TYPE_L2, + [IGC_RSS_TYPE_HASH_TCP_IPV4] = XDP_RSS_TYPE_L4_IPV4_TCP, + [IGC_RSS_TYPE_HASH_IPV4] = XDP_RSS_TYPE_L3_IPV4, + [IGC_RSS_TYPE_HASH_TCP_IPV6] = XDP_RSS_TYPE_L4_IPV6_TCP, + [IGC_RSS_TYPE_HASH_IPV6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, + [IGC_RSS_TYPE_HASH_IPV6] = XDP_RSS_TYPE_L3_IPV6, + [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, + [IGC_RSS_TYPE_HASH_UDP_IPV4] = XDP_RSS_TYPE_L4_IPV4_UDP, + [IGC_RSS_TYPE_HASH_UDP_IPV6] = XDP_RSS_TYPE_L4_IPV6_UDP, + [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX, + [10] = XDP_RSS_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */ + [11] = XDP_RSS_TYPE_NONE, /* keep array sized for SW bit-mask */ + [12] = XDP_RSS_TYPE_NONE, /* to handle future HW revisons */ + [13] = XDP_RSS_TYPE_NONE, + [14] = XDP_RSS_TYPE_NONE, + [15] = XDP_RSS_TYPE_NONE, +}; + +static int igc_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, + enum xdp_rss_hash_type *rss_type) +{ + const struct igc_xdp_buff *ctx = (void *)_ctx; + + if (!(ctx->xdp.rxq->dev->features & NETIF_F_RXHASH)) + return -ENODATA; + + *hash = le32_to_cpu(ctx->rx_desc->wb.lower.hi_dword.rss); + *rss_type = igc_xdp_rss_type[igc_rss_type(ctx->rx_desc)]; + + return 0; +} + +static int igc_xdp_rx_timestamp(const struct xdp_md *_ctx, u64 *timestamp) +{ + const struct igc_xdp_buff *ctx = (void *)_ctx; + + if (igc_test_staterr(ctx->rx_desc, IGC_RXDADV_STAT_TSIP)) { + *timestamp = ctx->rx_ts; + + return 0; + } + + return -ENODATA; +} + +static const struct xdp_metadata_ops igc_xdp_metadata_ops = { + .xmo_rx_hash = igc_xdp_rx_hash, + .xmo_rx_timestamp = igc_xdp_rx_timestamp, +}; + +static enum hrtimer_restart igc_qbv_scheduling_timer(struct hrtimer *timer) +{ + struct igc_adapter *adapter = container_of(timer, struct igc_adapter, + hrtimer); + unsigned long flags; + unsigned int i; + + spin_lock_irqsave(&adapter->qbv_tx_lock, flags); + + adapter->qbv_transition = true; + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *tx_ring = adapter->tx_ring[i]; + + if (tx_ring->admin_gate_closed) { + tx_ring->admin_gate_closed = false; + tx_ring->oper_gate_closed = true; + } else { + tx_ring->oper_gate_closed = false; + } + } + adapter->qbv_transition = false; + + spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); + + return HRTIMER_NORESTART; +} + /** * igc_probe - Device Initialization Routine * @pdev: PCI device information struct @@ -6534,6 +6739,7 @@ static int igc_probe(struct pci_dev *pdev, hw->hw_addr = adapter->io_addr; netdev->netdev_ops = &igc_netdev_ops; + netdev->xdp_metadata_ops = &igc_xdp_metadata_ops; igc_ethtool_set_ops(netdev); netdev->watchdog_timeo = 5 * HZ; @@ -6561,6 +6767,7 @@ static int igc_probe(struct pci_dev *pdev, netdev->features |= NETIF_F_TSO; netdev->features |= NETIF_F_TSO6; netdev->features |= NETIF_F_TSO_ECN; + netdev->features |= NETIF_F_RXHASH; netdev->features |= NETIF_F_RXCSUM; netdev->features |= NETIF_F_HW_CSUM; netdev->features |= NETIF_F_SCTP_CRC; @@ -6637,6 +6844,9 @@ static int igc_probe(struct pci_dev *pdev, INIT_WORK(&adapter->reset_task, igc_reset_task); INIT_WORK(&adapter->watchdog_task, igc_watchdog_task); + hrtimer_init(&adapter->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + adapter->hrtimer.function = &igc_qbv_scheduling_timer; + /* Initialize link properties that are user-changeable */ adapter->fc_autoneg = true; hw->mac.autoneg = true; @@ -6740,6 +6950,7 @@ static void igc_remove(struct pci_dev *pdev) cancel_work_sync(&adapter->reset_task); cancel_work_sync(&adapter->watchdog_task); + hrtimer_cancel(&adapter->hrtimer); /* Release control of h/w to f/w. If f/w is AMT enabled, this * would have already happened in close and is redundant. @@ -7137,18 +7348,6 @@ void igc_enable_rx_ring(struct igc_ring *ring) igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); } -static void igc_disable_tx_ring_hw(struct igc_ring *ring) -{ - struct igc_hw *hw = &ring->q_vector->adapter->hw; - u8 idx = ring->reg_idx; - u32 txdctl; - - txdctl = rd32(IGC_TXDCTL(idx)); - txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE; - txdctl |= IGC_TXDCTL_SWFLUSH; - wr32(IGC_TXDCTL(idx), txdctl); -} - void igc_disable_tx_ring(struct igc_ring *ring) { igc_disable_tx_ring_hw(ring); diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 4e10ced736db..f0b979a70655 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -356,16 +356,35 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, tsim &= ~IGC_TSICR_TT0; } if (on) { + struct timespec64 safe_start; int i = rq->perout.index; igc_pin_perout(igc, i, pin, use_freq); - igc->perout[i].start.tv_sec = rq->perout.start.sec; + igc_ptp_read(igc, &safe_start); + + /* PPS output start time is triggered by Target time(TT) + * register. Programming any past time value into TT + * register will cause PPS to never start. Need to make + * sure we program the TT register a time ahead in + * future. There isn't a stringent need to fire PPS out + * right away. Adding +2 seconds should take care of + * corner cases. Let's say if the SYSTIML is close to + * wrap up and the timer keeps ticking as we program the + * register, adding +2seconds is safe bet. + */ + safe_start.tv_sec += 2; + + if (rq->perout.start.sec < safe_start.tv_sec) + igc->perout[i].start.tv_sec = safe_start.tv_sec; + else + igc->perout[i].start.tv_sec = rq->perout.start.sec; igc->perout[i].start.tv_nsec = rq->perout.start.nsec; igc->perout[i].period.tv_sec = ts.tv_sec; igc->perout[i].period.tv_nsec = ts.tv_nsec; - wr32(trgttimh, rq->perout.start.sec); + wr32(trgttimh, (u32)igc->perout[i].start.tv_sec); /* For now, always select timer 0 as source. */ - wr32(trgttiml, rq->perout.start.nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); + wr32(trgttiml, (u32)(igc->perout[i].start.tv_nsec | + IGC_TT_IO_TIMER_SEL_SYSTIM0)); if (use_freq) wr32(freqout, ns); tsauxc |= tsauxc_mask; @@ -536,9 +555,34 @@ static void igc_ptp_enable_rx_timestamp(struct igc_adapter *adapter) wr32(IGC_TSYNCRXCTL, val); } +static void igc_ptp_clear_tx_tstamp(struct igc_adapter *adapter) +{ + unsigned long flags; + + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); + + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; + + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); +} + static void igc_ptp_disable_tx_timestamp(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; + int i; + + /* Clear the flags first to avoid new packets to be enqueued + * for TX timestamping. + */ + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *tx_ring = adapter->tx_ring[i]; + + clear_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags); + } + + /* Now we can clean the pending TX timestamp requests. */ + igc_ptp_clear_tx_tstamp(adapter); wr32(IGC_TSYNCTXCTL, 0); } @@ -546,12 +590,23 @@ static void igc_ptp_disable_tx_timestamp(struct igc_adapter *adapter) static void igc_ptp_enable_tx_timestamp(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; + int i; wr32(IGC_TSYNCTXCTL, IGC_TSYNCTXCTL_ENABLED | IGC_TSYNCTXCTL_TXSYNSIG); /* Read TXSTMP registers to discard any timestamp previously stored. */ rd32(IGC_TXSTMPL); rd32(IGC_TXSTMPH); + + /* The hardware is ready to accept TX timestamp requests, + * notify the transmit path. + */ + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *tx_ring = adapter->tx_ring[i]; + + set_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags); + } + } /** @@ -603,6 +658,7 @@ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter, return 0; } +/* Requires adapter->ptp_tx_lock held by caller. */ static void igc_ptp_tx_timeout(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; @@ -610,7 +666,6 @@ static void igc_ptp_tx_timeout(struct igc_adapter *adapter) dev_kfree_skb_any(adapter->ptp_tx_skb); adapter->ptp_tx_skb = NULL; adapter->tx_hwtstamp_timeouts++; - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); /* Clear the tx valid bit in TSYNCTXCTL register to enable interrupt. */ rd32(IGC_TXSTMPH); netdev_warn(adapter->netdev, "Tx timestamp timeout\n"); @@ -618,20 +673,20 @@ static void igc_ptp_tx_timeout(struct igc_adapter *adapter) void igc_ptp_tx_hang(struct igc_adapter *adapter) { - bool timeout = time_is_before_jiffies(adapter->ptp_tx_start + - IGC_PTP_TX_TIMEOUT); + unsigned long flags; - if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state)) - return; + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); - /* If we haven't received a timestamp within the timeout, it is - * reasonable to assume that it will never occur, so we can unlock the - * timestamp bit when this occurs. - */ - if (timeout) { - cancel_work_sync(&adapter->ptp_tx_work); - igc_ptp_tx_timeout(adapter); - } + if (!adapter->ptp_tx_skb) + goto unlock; + + if (time_is_after_jiffies(adapter->ptp_tx_start + IGC_PTP_TX_TIMEOUT)) + goto unlock; + + igc_ptp_tx_timeout(adapter); + +unlock: + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); } /** @@ -641,20 +696,57 @@ void igc_ptp_tx_hang(struct igc_adapter *adapter) * If we were asked to do hardware stamping and such a time stamp is * available, then it must have been for this skb here because we only * allow only one such packet into the queue. + * + * Context: Expects adapter->ptp_tx_lock to be held by caller. */ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) { struct sk_buff *skb = adapter->ptp_tx_skb; struct skb_shared_hwtstamps shhwtstamps; struct igc_hw *hw = &adapter->hw; + u32 tsynctxctl; int adjust = 0; u64 regval; if (WARN_ON_ONCE(!skb)) return; - regval = rd32(IGC_TXSTMPL); - regval |= (u64)rd32(IGC_TXSTMPH) << 32; + tsynctxctl = rd32(IGC_TSYNCTXCTL); + tsynctxctl &= IGC_TSYNCTXCTL_TXTT_0; + if (tsynctxctl) { + regval = rd32(IGC_TXSTMPL); + regval |= (u64)rd32(IGC_TXSTMPH) << 32; + } else { + /* There's a bug in the hardware that could cause + * missing interrupts for TX timestamping. The issue + * is that for new interrupts to be triggered, the + * IGC_TXSTMPH_0 register must be read. + * + * To avoid discarding a valid timestamp that just + * happened at the "wrong" time, we need to confirm + * that there was no timestamp captured, we do that by + * assuming that no two timestamps in sequence have + * the same nanosecond value. + * + * So, we read the "low" register, read the "high" + * register (to latch a new timestamp) and read the + * "low" register again, if "old" and "new" versions + * of the "low" register are different, a valid + * timestamp was captured, we can read the "high" + * register again. + */ + u32 txstmpl_old, txstmpl_new; + + txstmpl_old = rd32(IGC_TXSTMPL); + rd32(IGC_TXSTMPH); + txstmpl_new = rd32(IGC_TXSTMPL); + + if (txstmpl_old == txstmpl_new) + return; + + regval = txstmpl_new; + regval |= (u64)rd32(IGC_TXSTMPH) << 32; + } if (igc_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval)) return; @@ -676,13 +768,7 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) shhwtstamps.hwtstamp = ktime_add_ns(shhwtstamps.hwtstamp, adjust); - /* Clear the lock early before calling skb_tstamp_tx so that - * applications are not woken up before the lock bit is clear. We use - * a copy of the skb pointer to ensure other threads can't change it - * while we're notifying the stack. - */ adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); /* Notify the stack and free the skb after we've unlocked */ skb_tstamp_tx(skb, &shhwtstamps); @@ -690,27 +776,25 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) } /** - * igc_ptp_tx_work - * @work: pointer to work struct + * igc_ptp_tx_tstamp_event + * @adapter: board private structure * - * This work function polls the TSYNCTXCTL valid bit to determine when a - * timestamp has been taken for the current stored skb. + * Called when a TX timestamp interrupt happens to retrieve the + * timestamp and send it up to the socket. */ -static void igc_ptp_tx_work(struct work_struct *work) +void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter) { - struct igc_adapter *adapter = container_of(work, struct igc_adapter, - ptp_tx_work); - struct igc_hw *hw = &adapter->hw; - u32 tsynctxctl; + unsigned long flags; - if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state)) - return; + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); - tsynctxctl = rd32(IGC_TSYNCTXCTL); - if (WARN_ON_ONCE(!(tsynctxctl & IGC_TSYNCTXCTL_TXTT_0))) - return; + if (!adapter->ptp_tx_skb) + goto unlock; igc_ptp_tx_hwtstamp(adapter); + +unlock: + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); } /** @@ -959,8 +1043,8 @@ void igc_ptp_init(struct igc_adapter *adapter) return; } + spin_lock_init(&adapter->ptp_tx_lock); spin_lock_init(&adapter->tmreg_lock); - INIT_WORK(&adapter->ptp_tx_work, igc_ptp_tx_work); adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF; @@ -1020,10 +1104,7 @@ void igc_ptp_suspend(struct igc_adapter *adapter) if (!(adapter->ptp_flags & IGC_PTP_ENABLED)) return; - cancel_work_sync(&adapter->ptp_tx_work); - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); + igc_ptp_clear_tx_tstamp(adapter); if (pci_device_is_present(adapter->pdev)) { igc_ptp_time_save(adapter); diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 94a2b0dfb54d..a9c08321aca9 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -37,7 +37,7 @@ static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter) { unsigned int new_flags = adapter->flags & ~IGC_FLAG_TSN_ANY_ENABLED; - if (adapter->qbv_enable) + if (adapter->taprio_offload_enable) new_flags |= IGC_FLAG_TSN_QBV_ENABLED; if (is_any_launchtime(adapter)) @@ -114,7 +114,6 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) static int igc_tsn_enable_offload(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; - bool tsn_mode_reconfig = false; u32 tqavctrl, baset_l, baset_h; u32 sec, nsec, cycle; ktime_t base_time, systim; @@ -133,8 +132,28 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter) wr32(IGC_STQT(i), ring->start_time); wr32(IGC_ENDQT(i), ring->end_time); - txqctl |= IGC_TXQCTL_STRICT_CYCLE | - IGC_TXQCTL_STRICT_END; + if (adapter->taprio_offload_enable) { + /* If taprio_offload_enable is set we are in "taprio" + * mode and we need to be strict about the + * cycles: only transmit a packet if it can be + * completed during that cycle. + * + * If taprio_offload_enable is NOT true when + * enabling TSN offload, the cycle should have + * no external effects, but is only used internally + * to adapt the base time register after a second + * has passed. + * + * Enabling strict mode in this case would + * unnecessarily prevent the transmission of + * certain packets (i.e. at the boundary of a + * second) and thus interfere with the launchtime + * feature that promises transmission at a + * certain point in time. + */ + txqctl |= IGC_TXQCTL_STRICT_CYCLE | + IGC_TXQCTL_STRICT_END; + } if (ring->launchtime_enable) txqctl |= IGC_TXQCTL_QUEUE_MODE_LAUNCHT; @@ -228,11 +247,10 @@ skip_cbs: tqavctrl = rd32(IGC_TQAVCTRL) & ~IGC_TQAVCTRL_FUTSCDDIS; - if (tqavctrl & IGC_TQAVCTRL_TRANSMIT_MODE_TSN) - tsn_mode_reconfig = true; - tqavctrl |= IGC_TQAVCTRL_TRANSMIT_MODE_TSN | IGC_TQAVCTRL_ENHANCED_QAV; + adapter->qbv_count++; + cycle = adapter->cycle_time; base_time = adapter->base_time; @@ -249,17 +267,29 @@ skip_cbs: * Gate Control List (GCL) is running. */ if ((rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) && - tsn_mode_reconfig) + (adapter->tc_setup_type == TC_SETUP_QDISC_TAPRIO) && + (adapter->qbv_count > 1)) adapter->qbv_config_change_errors++; } else { - /* According to datasheet section 7.5.2.9.3.3, FutScdDis bit - * has to be configured before the cycle time and base time. - * Tx won't hang if there is a GCL is already running, - * so in this case we don't need to set FutScdDis. - */ - if (igc_is_device_id_i226(hw) && - !(rd32(IGC_BASET_H) || rd32(IGC_BASET_L))) - tqavctrl |= IGC_TQAVCTRL_FUTSCDDIS; + if (igc_is_device_id_i226(hw)) { + ktime_t adjust_time, expires_time; + + /* According to datasheet section 7.5.2.9.3.3, FutScdDis bit + * has to be configured before the cycle time and base time. + * Tx won't hang if a GCL is already running, + * so in this case we don't need to set FutScdDis. + */ + if (!(rd32(IGC_BASET_H) || rd32(IGC_BASET_L))) + tqavctrl |= IGC_TQAVCTRL_FUTSCDDIS; + + nsec = rd32(IGC_SYSTIML); + sec = rd32(IGC_SYSTIMH); + systim = ktime_set(sec, nsec); + + adjust_time = adapter->base_time; + expires_time = ktime_sub_ns(adjust_time, systim); + hrtimer_start(&adapter->hrtimer, expires_time, HRTIMER_MODE_REL); + } } wr32(IGC_TQAVCTRL, tqavctrl); @@ -305,7 +335,11 @@ int igc_tsn_offload_apply(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; - if (netif_running(adapter->netdev) && igc_is_device_id_i225(hw)) { + /* Per I225/6 HW Design Section 7.5.2.1, transmit mode + * cannot be changed dynamically. Require reset the adapter. + */ + if (netif_running(adapter->netdev) && + (igc_is_device_id_i225(hw) || !adapter->qbv_count)) { schedule_work(&adapter->reset_task); return 0; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 1726297f2e0d..8eb9839a3ca6 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8479,7 +8479,7 @@ static void ixgbe_atr(struct ixgbe_ring *ring, struct ixgbe_adapter *adapter = q_vector->adapter; if (unlikely(skb_tail_pointer(skb) < hdr.network + - VXLAN_HEADROOM)) + vxlan_headroom(0))) return; /* verify the port is recognized as VXLAN */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index 0310af851086..9339edbd9082 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -979,6 +979,7 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, u32 tsync_tx_ctl = IXGBE_TSYNCTXCTL_ENABLED; u32 tsync_rx_ctl = IXGBE_TSYNCRXCTL_ENABLED; u32 tsync_rx_mtrl = PTP_EV_PORT << 16; + u32 aflags = adapter->flags; bool is_l2 = false; u32 regval; @@ -996,20 +997,20 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, case HWTSTAMP_FILTER_NONE: tsync_rx_ctl = 0; tsync_rx_mtrl = 0; - adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | - IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); + aflags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); break; case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1; tsync_rx_mtrl |= IXGBE_RXMTRL_V1_SYNC_MSG; - adapter->flags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED | - IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); + aflags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); break; case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1; tsync_rx_mtrl |= IXGBE_RXMTRL_V1_DELAY_REQ_MSG; - adapter->flags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED | - IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); + aflags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); break; case HWTSTAMP_FILTER_PTP_V2_EVENT: case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: @@ -1023,8 +1024,8 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_EVENT_V2; is_l2 = true; config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; - adapter->flags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED | - IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); + aflags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); break; case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: case HWTSTAMP_FILTER_NTP_ALL: @@ -1035,7 +1036,7 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, if (hw->mac.type >= ixgbe_mac_X550) { tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_ALL; config->rx_filter = HWTSTAMP_FILTER_ALL; - adapter->flags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED; + aflags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED; break; } fallthrough; @@ -1046,8 +1047,6 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, * Delay_Req messages and hardware does not support * timestamping all packets => return error */ - adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | - IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); config->rx_filter = HWTSTAMP_FILTER_NONE; return -ERANGE; } @@ -1079,8 +1078,8 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, IXGBE_TSYNCRXCTL_TYPE_ALL | IXGBE_TSYNCRXCTL_TSIP_UT_EN; config->rx_filter = HWTSTAMP_FILTER_ALL; - adapter->flags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED; - adapter->flags &= ~IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER; + aflags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED; + aflags &= ~IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER; is_l2 = true; break; default: @@ -1113,6 +1112,9 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, IXGBE_WRITE_FLUSH(hw); + /* configure adapter flags only when HW is actually configured */ + adapter->flags = aflags; + /* clear TX/RX time stamp registers, just to be sure */ ixgbe_ptp_clear_tx_timestamp(adapter); IXGBE_READ_REG(hw, IXGBE_RXSTMPH); diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index 2b9335cb4bb3..8537578e1cf1 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -1302,11 +1302,10 @@ static int korina_probe(struct platform_device *pdev) else if (of_get_ethdev_address(pdev->dev.of_node, dev) < 0) eth_hw_addr_random(dev); - clk = devm_clk_get_optional(&pdev->dev, "mdioclk"); + clk = devm_clk_get_optional_enabled(&pdev->dev, "mdioclk"); if (IS_ERR(clk)) return PTR_ERR(clk); if (clk) { - clk_prepare_enable(clk); lp->mii_clock_freq = clk_get_rate(clk); } else { lp->mii_clock_freq = 200000000; /* max possible input clk */ diff --git a/drivers/net/ethernet/litex/litex_liteeth.c b/drivers/net/ethernet/litex/litex_liteeth.c index 35f24e0f0934..ffa96059079c 100644 --- a/drivers/net/ethernet/litex/litex_liteeth.c +++ b/drivers/net/ethernet/litex/litex_liteeth.c @@ -78,8 +78,7 @@ static int liteeth_rx(struct net_device *netdev) memcpy_fromio(data, priv->rx_base + rx_slot * priv->slot_size, len); skb->protocol = eth_type_trans(skb, netdev); - netdev->stats.rx_packets++; - netdev->stats.rx_bytes += len; + dev_sw_netstats_rx_add(netdev, len); return netif_rx(skb); @@ -185,8 +184,7 @@ static netdev_tx_t liteeth_start_xmit(struct sk_buff *skb, litex_write16(priv->base + LITEETH_READER_LENGTH, skb->len); litex_write8(priv->base + LITEETH_READER_START, 1); - netdev->stats.tx_bytes += skb->len; - netdev->stats.tx_packets++; + dev_sw_netstats_tx_add(netdev, 1, skb->len); priv->tx_slot = (priv->tx_slot + 1) % priv->num_tx_slots; dev_kfree_skb_any(skb); @@ -194,9 +192,17 @@ static netdev_tx_t liteeth_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } +static void +liteeth_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) +{ + netdev_stats_to_stats64(stats, &netdev->stats); + dev_fetch_sw_netstats(stats, netdev->tstats); +} + static const struct net_device_ops liteeth_netdev_ops = { .ndo_open = liteeth_open, .ndo_stop = liteeth_stop, + .ndo_get_stats64 = liteeth_get_stats64, .ndo_start_xmit = liteeth_start_xmit, }; @@ -242,6 +248,11 @@ static int liteeth_probe(struct platform_device *pdev) priv->netdev = netdev; priv->dev = &pdev->dev; + netdev->tstats = devm_netdev_alloc_pcpu_stats(&pdev->dev, + struct pcpu_sw_netstats); + if (!netdev->tstats) + return -ENOMEM; + irq = platform_get_irq(pdev, 0); if (irq < 0) return irq; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 2cad76d0a50e..acf4f6ba73a6 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -344,6 +344,15 @@ #define MVNETA_MAX_SKB_DESCS (MVNETA_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS) +/* The size of a TSO header page */ +#define MVNETA_TSO_PAGE_SIZE (2 * PAGE_SIZE) + +/* Number of TSO headers per page. This should be a power of 2 */ +#define MVNETA_TSO_PER_PAGE (MVNETA_TSO_PAGE_SIZE / TSO_HEADER_SIZE) + +/* Maximum number of TSO header pages */ +#define MVNETA_MAX_TSO_PAGES (MVNETA_MAX_TXD / MVNETA_TSO_PER_PAGE) + /* descriptor aligned size */ #define MVNETA_DESC_ALIGNED_SIZE 32 @@ -364,10 +373,6 @@ MVNETA_SKB_HEADROOM)) #define MVNETA_MAX_RX_BUF_SIZE (PAGE_SIZE - MVNETA_SKB_PAD) -#define IS_TSO_HEADER(txq, addr) \ - ((addr >= txq->tso_hdrs_phys) && \ - (addr < txq->tso_hdrs_phys + txq->size * TSO_HEADER_SIZE)) - #define MVNETA_RX_GET_BM_POOL_ID(rxd) \ (((rxd)->status & MVNETA_RXD_BM_POOL_MASK) >> MVNETA_RXD_BM_POOL_SHIFT) @@ -638,6 +643,7 @@ struct mvneta_rx_desc { #endif enum mvneta_tx_buf_type { + MVNETA_TYPE_TSO, MVNETA_TYPE_SKB, MVNETA_TYPE_XDP_TX, MVNETA_TYPE_XDP_NDO, @@ -690,10 +696,10 @@ struct mvneta_tx_queue { int next_desc_to_proc; /* DMA buffers for TSO headers */ - char *tso_hdrs; + char *tso_hdrs[MVNETA_MAX_TSO_PAGES]; /* DMA address of TSO headers */ - dma_addr_t tso_hdrs_phys; + dma_addr_t tso_hdrs_phys[MVNETA_MAX_TSO_PAGES]; /* Affinity mask for CPUs*/ cpumask_t affinity_mask; @@ -1505,7 +1511,7 @@ static void mvneta_defaults_set(struct mvneta_port *pp) */ if (txq_number == 1) txq_map = (cpu == pp->rxq_def) ? - MVNETA_CPU_TXQ_ACCESS(1) : 0; + MVNETA_CPU_TXQ_ACCESS(0) : 0; } else { txq_map = MVNETA_CPU_TXQ_ACCESS_ALL_MASK; @@ -1878,12 +1884,13 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp, mvneta_txq_inc_get(txq); - if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr) && - buf->type != MVNETA_TYPE_XDP_TX) + if (buf->type == MVNETA_TYPE_XDP_NDO || + buf->type == MVNETA_TYPE_SKB) dma_unmap_single(pp->dev->dev.parent, tx_desc->buf_phys_addr, tx_desc->data_size, DMA_TO_DEVICE); - if (buf->type == MVNETA_TYPE_SKB && buf->skb) { + if ((buf->type == MVNETA_TYPE_TSO || + buf->type == MVNETA_TYPE_SKB) && buf->skb) { bytes_compl += buf->skb->len; pkts_compl++; dev_kfree_skb_any(buf->skb); @@ -2369,9 +2376,8 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp, if (data_len > 0 && sinfo->nr_frags < MAX_SKB_FRAGS) { skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags++]; - skb_frag_off_set(frag, pp->rx_offset_correction); - skb_frag_size_set(frag, data_len); - __skb_frag_set_page(frag, page); + skb_frag_fill_page_desc(frag, page, + pp->rx_offset_correction, data_len); if (!xdp_buff_has_frags(xdp)) { sinfo->xdp_frags_size = *size; @@ -2661,20 +2667,72 @@ err_drop_frame: return rx_done; } -static inline void -mvneta_tso_put_hdr(struct sk_buff *skb, struct mvneta_tx_queue *txq) +static void mvneta_free_tso_hdrs(struct mvneta_port *pp, + struct mvneta_tx_queue *txq) +{ + struct device *dev = pp->dev->dev.parent; + int i; + + for (i = 0; i < MVNETA_MAX_TSO_PAGES; i++) { + if (txq->tso_hdrs[i]) { + dma_free_coherent(dev, MVNETA_TSO_PAGE_SIZE, + txq->tso_hdrs[i], + txq->tso_hdrs_phys[i]); + txq->tso_hdrs[i] = NULL; + } + } +} + +static int mvneta_alloc_tso_hdrs(struct mvneta_port *pp, + struct mvneta_tx_queue *txq) +{ + struct device *dev = pp->dev->dev.parent; + int i, num; + + num = DIV_ROUND_UP(txq->size, MVNETA_TSO_PER_PAGE); + for (i = 0; i < num; i++) { + txq->tso_hdrs[i] = dma_alloc_coherent(dev, MVNETA_TSO_PAGE_SIZE, + &txq->tso_hdrs_phys[i], + GFP_KERNEL); + if (!txq->tso_hdrs[i]) { + mvneta_free_tso_hdrs(pp, txq); + return -ENOMEM; + } + } + + return 0; +} + +static char *mvneta_get_tso_hdr(struct mvneta_tx_queue *txq, dma_addr_t *dma) +{ + int index, offset; + + index = txq->txq_put_index / MVNETA_TSO_PER_PAGE; + offset = (txq->txq_put_index % MVNETA_TSO_PER_PAGE) * TSO_HEADER_SIZE; + + *dma = txq->tso_hdrs_phys[index] + offset; + + return txq->tso_hdrs[index] + offset; +} + +static void mvneta_tso_put_hdr(struct sk_buff *skb, struct mvneta_tx_queue *txq, + struct tso_t *tso, int size, bool is_last) { struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index]; int hdr_len = skb_tcp_all_headers(skb); struct mvneta_tx_desc *tx_desc; + dma_addr_t hdr_phys; + char *hdr; + + hdr = mvneta_get_tso_hdr(txq, &hdr_phys); + tso_build_hdr(skb, hdr, tso, size, is_last); tx_desc = mvneta_txq_next_desc_get(txq); tx_desc->data_size = hdr_len; tx_desc->command = mvneta_skb_tx_csum(skb); tx_desc->command |= MVNETA_TXD_F_DESC; - tx_desc->buf_phys_addr = txq->tso_hdrs_phys + - txq->txq_put_index * TSO_HEADER_SIZE; - buf->type = MVNETA_TYPE_SKB; + tx_desc->buf_phys_addr = hdr_phys; + buf->type = MVNETA_TYPE_TSO; buf->skb = NULL; mvneta_txq_inc_put(txq); @@ -2714,14 +2772,41 @@ mvneta_tso_put_data(struct net_device *dev, struct mvneta_tx_queue *txq, return 0; } +static void mvneta_release_descs(struct mvneta_port *pp, + struct mvneta_tx_queue *txq, + int first, int num) +{ + int desc_idx, i; + + desc_idx = first + num; + if (desc_idx >= txq->size) + desc_idx -= txq->size; + + for (i = num; i >= 0; i--) { + struct mvneta_tx_desc *tx_desc = txq->descs + desc_idx; + struct mvneta_tx_buf *buf = &txq->buf[desc_idx]; + + if (buf->type == MVNETA_TYPE_SKB) + dma_unmap_single(pp->dev->dev.parent, + tx_desc->buf_phys_addr, + tx_desc->data_size, + DMA_TO_DEVICE); + + mvneta_txq_desc_put(txq); + + if (desc_idx == 0) + desc_idx = txq->size; + desc_idx -= 1; + } +} + static int mvneta_tx_tso(struct sk_buff *skb, struct net_device *dev, struct mvneta_tx_queue *txq) { int hdr_len, total_len, data_left; - int desc_count = 0; + int first_desc, desc_count = 0; struct mvneta_port *pp = netdev_priv(dev); struct tso_t tso; - int i; /* Count needed descriptors */ if ((txq->count + tso_count_descs(skb)) >= txq->size) @@ -2732,22 +2817,19 @@ static int mvneta_tx_tso(struct sk_buff *skb, struct net_device *dev, return 0; } + first_desc = txq->txq_put_index; + /* Initialize the TSO handler, and prepare the first payload */ hdr_len = tso_start(skb, &tso); total_len = skb->len - hdr_len; while (total_len > 0) { - char *hdr; - data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len); total_len -= data_left; desc_count++; /* prepare packet headers: MAC + IP + TCP */ - hdr = txq->tso_hdrs + txq->txq_put_index * TSO_HEADER_SIZE; - tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0); - - mvneta_tso_put_hdr(skb, txq); + mvneta_tso_put_hdr(skb, txq, &tso, data_left, total_len == 0); while (data_left > 0) { int size; @@ -2772,15 +2854,7 @@ err_release: /* Release all used data descriptors; header descriptors must not * be DMA-unmapped. */ - for (i = desc_count - 1; i >= 0; i--) { - struct mvneta_tx_desc *tx_desc = txq->descs + i; - if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr)) - dma_unmap_single(pp->dev->dev.parent, - tx_desc->buf_phys_addr, - tx_desc->data_size, - DMA_TO_DEVICE); - mvneta_txq_desc_put(txq); - } + mvneta_release_descs(pp, txq, first_desc, desc_count - 1); return 0; } @@ -2790,6 +2864,7 @@ static int mvneta_tx_frag_process(struct mvneta_port *pp, struct sk_buff *skb, { struct mvneta_tx_desc *tx_desc; int i, nr_frags = skb_shinfo(skb)->nr_frags; + int first_desc = txq->txq_put_index; for (i = 0; i < nr_frags; i++) { struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index]; @@ -2828,15 +2903,7 @@ error: /* Release all descriptors that were used to map fragments of * this packet, as well as the corresponding DMA mappings */ - for (i = i - 1; i >= 0; i--) { - tx_desc = txq->descs + i; - dma_unmap_single(pp->dev->dev.parent, - tx_desc->buf_phys_addr, - tx_desc->data_size, - DMA_TO_DEVICE); - mvneta_txq_desc_put(txq); - } - + mvneta_release_descs(pp, txq, first_desc, i - 1); return -ENOMEM; } @@ -3457,7 +3524,7 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp, static int mvneta_txq_sw_init(struct mvneta_port *pp, struct mvneta_tx_queue *txq) { - int cpu; + int cpu, err; txq->size = pp->tx_ring_size; @@ -3482,11 +3549,9 @@ static int mvneta_txq_sw_init(struct mvneta_port *pp, return -ENOMEM; /* Allocate DMA buffers for TSO MAC/IP/TCP headers */ - txq->tso_hdrs = dma_alloc_coherent(pp->dev->dev.parent, - txq->size * TSO_HEADER_SIZE, - &txq->tso_hdrs_phys, GFP_KERNEL); - if (!txq->tso_hdrs) - return -ENOMEM; + err = mvneta_alloc_tso_hdrs(pp, txq); + if (err) + return err; /* Setup XPS mapping */ if (pp->neta_armada3700) @@ -3538,10 +3603,7 @@ static void mvneta_txq_sw_deinit(struct mvneta_port *pp, kfree(txq->buf); - if (txq->tso_hdrs) - dma_free_coherent(pp->dev->dev.parent, - txq->size * TSO_HEADER_SIZE, - txq->tso_hdrs, txq->tso_hdrs_phys); + mvneta_free_tso_hdrs(pp, txq); if (txq->descs) dma_free_coherent(pp->dev->dev.parent, txq->size * MVNETA_DESC_ALIGNED_SIZE, @@ -3550,7 +3612,6 @@ static void mvneta_txq_sw_deinit(struct mvneta_port *pp, netdev_tx_reset_queue(nq); txq->buf = NULL; - txq->tso_hdrs = NULL; txq->descs = NULL; txq->last_desc = 0; txq->next_desc_to_proc = 0; @@ -3941,8 +4002,8 @@ static void mvneta_pcs_get_state(struct phylink_pcs *pcs, state->pause |= MLO_PAUSE_TX; } -static int mvneta_pcs_config(struct phylink_pcs *pcs, - unsigned int mode, phy_interface_t interface, +static int mvneta_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, + phy_interface_t interface, const unsigned long *advertising, bool permit_pause_to_mac) { @@ -3955,7 +4016,7 @@ static int mvneta_pcs_config(struct phylink_pcs *pcs, MVNETA_GMAC_AN_FLOW_CTRL_EN | MVNETA_GMAC_AN_DUPLEX_EN; - if (phylink_autoneg_inband(mode)) { + if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED) { mask |= MVNETA_GMAC_CONFIG_MII_SPEED | MVNETA_GMAC_CONFIG_GMII_SPEED | MVNETA_GMAC_CONFIG_FULL_DUPLEX; @@ -4295,7 +4356,7 @@ static void mvneta_percpu_elect(struct mvneta_port *pp) */ if (txq_number == 1) txq_map = (cpu == elected_cpu) ? - MVNETA_CPU_TXQ_ACCESS(1) : 0; + MVNETA_CPU_TXQ_ACCESS(0) : 0; else txq_map = mvreg_read(pp, MVNETA_CPU_MAP(cpu)) & MVNETA_CPU_TXQ_ACCESS_ALL_MASK; @@ -5457,6 +5518,7 @@ static int mvneta_probe(struct platform_device *pdev) clk_prepare_enable(pp->clk_bus); pp->phylink_pcs.ops = &mvneta_phylink_pcs_ops; + pp->phylink_pcs.neg_mode = true; pp->phylink_config.dev = &dev->dev; pp->phylink_config.type = PHYLINK_NETDEV; @@ -5821,6 +5883,8 @@ static int __init mvneta_driver_init(void) { int ret; + BUILD_BUG_ON_NOT_POWER_OF_2(MVNETA_TSO_PER_PAGE); + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "net/mvneta:online", mvneta_cpu_online, mvneta_cpu_down_prepare); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index adc953611913..0129afa1210e 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -5586,6 +5586,11 @@ static int mvpp2_ethtool_get_rxnfc(struct net_device *dev, break; case ETHTOOL_GRXCLSRLALL: for (i = 0; i < MVPP2_N_RFS_ENTRIES_PER_FLOW; i++) { + if (loc == info->rule_cnt) { + ret = -EMSGSIZE; + break; + } + if (port->rfs_rules[i]) rules[loc++] = i; } @@ -6168,8 +6173,7 @@ static void mvpp2_xlg_pcs_get_state(struct phylink_pcs *pcs, state->pause |= MLO_PAUSE_RX; } -static int mvpp2_xlg_pcs_config(struct phylink_pcs *pcs, - unsigned int mode, +static int mvpp2_xlg_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, bool permit_pause_to_mac) @@ -6232,7 +6236,7 @@ static void mvpp2_gmac_pcs_get_state(struct phylink_pcs *pcs, state->pause |= MLO_PAUSE_TX; } -static int mvpp2_gmac_pcs_config(struct phylink_pcs *pcs, unsigned int mode, +static int mvpp2_gmac_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, bool permit_pause_to_mac) @@ -6246,7 +6250,7 @@ static int mvpp2_gmac_pcs_config(struct phylink_pcs *pcs, unsigned int mode, MVPP2_GMAC_FLOW_CTRL_AUTONEG | MVPP2_GMAC_AN_DUPLEX_EN; - if (phylink_autoneg_inband(mode)) { + if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED) { mask |= MVPP2_GMAC_CONFIG_MII_SPEED | MVPP2_GMAC_CONFIG_GMII_SPEED | MVPP2_GMAC_CONFIG_FULL_DUPLEX; @@ -6649,8 +6653,9 @@ static void mvpp2_acpi_start(struct mvpp2_port *port) mvpp2_mac_prepare(&port->phylink_config, MLO_AN_INBAND, port->phy_interface); mvpp2_mac_config(&port->phylink_config, MLO_AN_INBAND, &state); - pcs->ops->pcs_config(pcs, MLO_AN_INBAND, port->phy_interface, - state.advertising, false); + pcs->ops->pcs_config(pcs, PHYLINK_PCS_NEG_INBAND_ENABLED, + port->phy_interface, state.advertising, + false); mvpp2_mac_finish(&port->phylink_config, MLO_AN_INBAND, port->phy_interface); mvpp2_mac_link_up(&port->phylink_config, NULL, @@ -6896,7 +6901,9 @@ static int mvpp2_port_probe(struct platform_device *pdev, dev->dev.of_node = port_node; port->pcs_gmac.ops = &mvpp2_phylink_gmac_pcs_ops; + port->pcs_gmac.neg_mode = true; port->pcs_xlg.ops = &mvpp2_phylink_xlg_pcs_ops; + port->pcs_xlg.neg_mode = true; if (!mvpp2_use_acpi_compat_mode(port_fwnode)) { port->phylink_config.dev = &dev->dev; diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c index 035ead7935c7..dab61cc1acb5 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c @@ -98,6 +98,9 @@ int octep_ctrl_mbox_init(struct octep_ctrl_mbox *mbox) writeq(OCTEP_CTRL_MBOX_STATUS_INIT, OCTEP_CTRL_MBOX_INFO_HOST_STATUS(mbox->barmem)); + mutex_init(&mbox->h2fq_lock); + mutex_init(&mbox->f2hq_lock); + mbox->h2fq.sz = readl(OCTEP_CTRL_MBOX_H2FQ_SZ(mbox->barmem)); mbox->h2fq.hw_prod = OCTEP_CTRL_MBOX_H2FQ_PROD(mbox->barmem); mbox->h2fq.hw_cons = OCTEP_CTRL_MBOX_H2FQ_CONS(mbox->barmem); diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c index 1cc6af2feb38..565320ec24f8 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c @@ -55,7 +55,7 @@ static int octep_send_mbox_req(struct octep_device *oct, list_add_tail(&d->list, &oct->ctrl_req_wait_list); ret = wait_event_interruptible_timeout(oct->ctrl_req_wait_q, (d->done != 0), - jiffies + msecs_to_jiffies(500)); + msecs_to_jiffies(500)); list_del(&d->list); if (ret == 0 || ret == 1) return -EAGAIN; diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 43eb6e871351..4424de2ffd70 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -1038,6 +1038,10 @@ static void octep_device_cleanup(struct octep_device *oct) { int i; + oct->poll_non_ioq_intr = false; + cancel_delayed_work_sync(&oct->intr_poll_task); + cancel_work_sync(&oct->ctrl_mbox_task); + dev_info(&oct->pdev->dev, "Cleaning up Octeon Device ...\n"); for (i = 0; i < OCTEP_MAX_VF; i++) { @@ -1200,14 +1204,11 @@ static void octep_remove(struct pci_dev *pdev) if (!oct) return; - cancel_work_sync(&oct->tx_timeout_task); - cancel_work_sync(&oct->ctrl_mbox_task); netdev = oct->netdev; if (netdev->reg_state == NETREG_REGISTERED) unregister_netdev(netdev); - oct->poll_non_ioq_intr = false; - cancel_delayed_work_sync(&oct->intr_poll_task); + cancel_work_sync(&oct->tx_timeout_task); octep_device_cleanup(oct); pci_release_mem_regions(pdev); free_netdev(netdev); diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c index 392d9b0da0d7..3c43f8078528 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c @@ -158,7 +158,7 @@ static int octep_setup_oq(struct octep_device *oct, int q_no) goto desc_dma_alloc_err; } - oq->buff_info = vzalloc(oq->max_count * OCTEP_OQ_RECVBUF_SIZE); + oq->buff_info = vcalloc(oq->max_count, OCTEP_OQ_RECVBUF_SIZE); if (unlikely(!oq->buff_info)) { dev_err(&oct->pdev->dev, "Failed to allocate buffer info for OQ-%d\n", q_no); diff --git a/drivers/net/ethernet/marvell/octeontx2/Kconfig b/drivers/net/ethernet/marvell/octeontx2/Kconfig index 993ac180a5db..a32d85d6f599 100644 --- a/drivers/net/ethernet/marvell/octeontx2/Kconfig +++ b/drivers/net/ethernet/marvell/octeontx2/Kconfig @@ -32,6 +32,7 @@ config OCTEONTX2_PF tristate "Marvell OcteonTX2 NIC Physical Function driver" select OCTEONTX2_MBOX select NET_DEVLINK + select PAGE_POOL depends on (64BIT && COMPILE_TEST) || ARM64 select DIMLIB depends on PCI diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index bd77152bb8d7..592037f4e55b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -169,6 +169,9 @@ void cgx_lmac_write(int cgx_id, int lmac_id, u64 offset, u64 val) { struct cgx *cgx_dev = cgx_get_pdata(cgx_id); + /* Software must not access disabled LMAC registers */ + if (!is_lmac_valid(cgx_dev, lmac_id)) + return; cgx_write(cgx_dev, lmac_id, offset, val); } @@ -176,6 +179,10 @@ u64 cgx_lmac_read(int cgx_id, int lmac_id, u64 offset) { struct cgx *cgx_dev = cgx_get_pdata(cgx_id); + /* Software must not access disabled LMAC registers */ + if (!is_lmac_valid(cgx_dev, lmac_id)) + return 0; + return cgx_read(cgx_dev, lmac_id, offset); } @@ -530,14 +537,15 @@ static u32 cgx_get_lmac_fifo_len(void *cgxd, int lmac_id) int cgx_lmac_internal_loopback(void *cgxd, int lmac_id, bool enable) { struct cgx *cgx = cgxd; - u8 lmac_type; + struct lmac *lmac; u64 cfg; if (!is_lmac_valid(cgx, lmac_id)) return -ENODEV; - lmac_type = cgx->mac_ops->get_lmac_type(cgx, lmac_id); - if (lmac_type == LMAC_MODE_SGMII || lmac_type == LMAC_MODE_QSGMII) { + lmac = lmac_pdata(lmac_id, cgx); + if (lmac->lmac_type == LMAC_MODE_SGMII || + lmac->lmac_type == LMAC_MODE_QSGMII) { cfg = cgx_read(cgx, lmac_id, CGXX_GMP_PCS_MRX_CTL); if (enable) cfg |= CGXX_GMP_PCS_MRX_CTL_LBK; @@ -1556,6 +1564,23 @@ int cgx_lmac_linkup_start(void *cgxd) return 0; } +int cgx_lmac_reset(void *cgxd, int lmac_id, u8 pf_req_flr) +{ + struct cgx *cgx = cgxd; + u64 cfg; + + if (!is_lmac_valid(cgx, lmac_id)) + return -ENODEV; + + /* Resetting PFC related CSRs */ + cfg = 0xff; + cgx_write(cgxd, lmac_id, CGXX_CMRX_RX_LOGL_XON, cfg); + + if (pf_req_flr) + cgx_lmac_internal_loopback(cgxd, lmac_id, false); + return 0; +} + static int cgx_configure_interrupt(struct cgx *cgx, struct lmac *lmac, int cnt, bool req_free) { @@ -1675,6 +1700,7 @@ static int cgx_lmac_init(struct cgx *cgx) cgx->lmac_idmap[lmac->lmac_id] = lmac; set_bit(lmac->lmac_id, &cgx->lmac_bmap); cgx->mac_ops->mac_pause_frm_config(cgx, lmac->lmac_id, true); + lmac->lmac_type = cgx->mac_ops->get_lmac_type(cgx, lmac->lmac_id); } return cgx_lmac_verify_fwi_version(cgx); @@ -1771,6 +1797,7 @@ static struct mac_ops cgx_mac_ops = { .mac_tx_enable = cgx_lmac_tx_enable, .pfc_config = cgx_lmac_pfc_config, .mac_get_pfc_frm_cfg = cgx_lmac_get_pfc_frm_cfg, + .mac_reset = cgx_lmac_reset, }; static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index 5a20d93004c7..574114179688 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -35,6 +35,7 @@ #define CGXX_CMRX_INT_ENA_W1S 0x058 #define CGXX_CMRX_RX_ID_MAP 0x060 #define CGXX_CMRX_RX_STAT0 0x070 +#define CGXX_CMRX_RX_LOGL_XON 0x100 #define CGXX_CMRX_RX_LMACS 0x128 #define CGXX_CMRX_RX_DMAC_CTL0 (0x1F8 + mac_ops->csr_offset) #define CGX_DMAC_CTL0_CAM_ENABLE BIT_ULL(3) @@ -181,4 +182,5 @@ int cgx_lmac_get_pfc_frm_cfg(void *cgxd, int lmac_id, u8 *tx_pause, u8 *rx_pause); int verify_lmac_fc_cfg(void *cgxd, int lmac_id, u8 tx_pause, u8 rx_pause, int pfvf_idx); +int cgx_lmac_reset(void *cgxd, int lmac_id, u8 pf_req_flr); #endif /* CGX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/common.h b/drivers/net/ethernet/marvell/octeontx2/af/common.h index 8931864ee110..2436c1ff9ba4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h @@ -142,9 +142,16 @@ enum nix_scheduler { #define TXSCH_RR_QTM_MAX ((1 << 24) - 1) #define TXSCH_TL1_DFLT_RR_QTM TXSCH_RR_QTM_MAX -#define TXSCH_TL1_DFLT_RR_PRIO (0x1ull) +#define TXSCH_TL1_DFLT_RR_PRIO (0x7ull) #define CN10K_MAX_DWRR_WEIGHT 16384 /* Weight is 14bit on CN10K */ +/* Don't change the order as on CN10K (except CN10KB) + * SMQX_CFG[SDP] value should be 1 for SDP flows. + */ +#define SMQ_LINK_TYPE_RPM 0 +#define SMQ_LINK_TYPE_SDP 1 +#define SMQ_LINK_TYPE_LBK 2 + /* Min/Max packet sizes, excluding FCS */ #define NIC_HW_MIN_FRS 40 #define NIC_HW_MAX_FRS 9212 diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h index 39aaf0e4467d..0b4cba03f2e8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h @@ -24,6 +24,7 @@ * @cgx: parent cgx port * @mcast_filters_count: Number of multicast filters installed * @lmac_id: lmac port id + * @lmac_type: lmac type like SGMII/XAUI * @cmd_pend: flag set before new command is started * flag cleared after command response is received * @name: lmac port name @@ -43,6 +44,7 @@ struct lmac { struct cgx *cgx; u8 mcast_filters_count; u8 lmac_id; + u8 lmac_type; bool cmd_pend; char *name; }; @@ -125,6 +127,7 @@ struct mac_ops { int (*mac_get_pfc_frm_cfg)(void *cgxd, int lmac_id, u8 *tx_pause, u8 *rx_pause); + int (*mac_reset)(void *cgxd, int lmac_id, u8 pf_req_flr); /* FEC stats */ int (*get_fec_stats)(void *cgxd, int lmac_id, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 6389ed83637d..eba307eee2b2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1080,6 +1080,8 @@ struct nix_vtag_config_rsp { */ }; +#define NIX_FLOW_KEY_TYPE_L3_L4_MASK (~(0xf << 28)) + struct nix_rss_flowkey_cfg { struct mbox_msghdr hdr; int mcam_index; /* MCAM entry index to modify */ @@ -1105,6 +1107,10 @@ struct nix_rss_flowkey_cfg { #define NIX_FLOW_KEY_TYPE_IPV4_PROTO BIT(21) #define NIX_FLOW_KEY_TYPE_AH BIT(22) #define NIX_FLOW_KEY_TYPE_ESP BIT(23) +#define NIX_FLOW_KEY_TYPE_L4_DST_ONLY BIT(28) +#define NIX_FLOW_KEY_TYPE_L4_SRC_ONLY BIT(29) +#define NIX_FLOW_KEY_TYPE_L3_DST_ONLY BIT(30) +#define NIX_FLOW_KEY_TYPE_L3_SRC_ONLY BIT(31) u32 flowkey_cfg; /* Flowkey types selected */ u8 group; /* RSS context or group */ }; @@ -1151,6 +1157,7 @@ struct nix_rx_cfg { struct mbox_msghdr hdr; #define NIX_RX_OL3_VERIFY BIT(0) #define NIX_RX_OL4_VERIFY BIT(1) +#define NIX_RX_DROP_RE BIT(2) u8 len_verify; /* Outer L3/L4 len check */ #define NIX_RX_CSUM_OL4_VERIFY BIT(0) u8 csum_verify; /* Outer L4 checksum verification */ @@ -1239,7 +1246,9 @@ struct nix_hw_info { u16 min_mtu; u32 rpm_dwrr_mtu; u32 sdp_dwrr_mtu; - u64 rsvd[16]; /* Add reserved fields for future expansion */ + u32 lbk_dwrr_mtu; + u32 rsvd32[1]; + u64 rsvd[15]; /* Add reserved fields for future expansion */ }; struct nix_bandprof_alloc_req { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c index 3411e2e47d46..0ee420a489fc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c @@ -208,7 +208,7 @@ struct ptp *ptp_get(void) /* Check driver is bound to PTP block */ if (!ptp) ptp = ERR_PTR(-EPROBE_DEFER); - else + else if (!IS_ERR(ptp)) pci_dev_get(ptp->pdev); return ptp; @@ -388,11 +388,10 @@ static int ptp_extts_on(struct ptp *ptp, int on) static int ptp_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { - struct device *dev = &pdev->dev; struct ptp *ptp; int err; - ptp = devm_kzalloc(dev, sizeof(*ptp), GFP_KERNEL); + ptp = kzalloc(sizeof(*ptp), GFP_KERNEL); if (!ptp) { err = -ENOMEM; goto error; @@ -428,20 +427,19 @@ static int ptp_probe(struct pci_dev *pdev, return 0; error_free: - devm_kfree(dev, ptp); + kfree(ptp); error: /* For `ptp_get()` we need to differentiate between the case * when the core has not tried to probe this device and the case when - * the probe failed. In the later case we pretend that the - * initialization was successful and keep the error in + * the probe failed. In the later case we keep the error in * `dev->driver_data`. */ pci_set_drvdata(pdev, ERR_PTR(err)); if (!first_ptp_block) first_ptp_block = ERR_PTR(err); - return 0; + return err; } static void ptp_remove(struct pci_dev *pdev) @@ -449,16 +447,17 @@ static void ptp_remove(struct pci_dev *pdev) struct ptp *ptp = pci_get_drvdata(pdev); u64 clock_cfg; - if (cn10k_ptp_errata(ptp) && hrtimer_active(&ptp->hrtimer)) - hrtimer_cancel(&ptp->hrtimer); - if (IS_ERR_OR_NULL(ptp)) return; + if (cn10k_ptp_errata(ptp) && hrtimer_active(&ptp->hrtimer)) + hrtimer_cancel(&ptp->hrtimer); + /* Disable PTP clock */ clock_cfg = readq(ptp->reg_base + PTP_CLOCK_CFG); clock_cfg &= ~PTP_CLOCK_CFG_PTP_EN; writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG); + kfree(ptp); } static const struct pci_device_id ptp_id_table[] = { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index de0d88dd10d6..af21e2030cff 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -37,6 +37,7 @@ static struct mac_ops rpm_mac_ops = { .mac_tx_enable = rpm_lmac_tx_enable, .pfc_config = rpm_lmac_pfc_config, .mac_get_pfc_frm_cfg = rpm_lmac_get_pfc_frm_cfg, + .mac_reset = rpm_lmac_reset, }; static struct mac_ops rpm2_mac_ops = { @@ -47,7 +48,7 @@ static struct mac_ops rpm2_mac_ops = { .int_set_reg = RPM2_CMRX_SW_INT_ENA_W1S, .irq_offset = 1, .int_ena_bit = BIT_ULL(0), - .lmac_fwi = RPM_LMAC_FWI, + .lmac_fwi = RPM2_LMAC_FWI, .non_contiguous_serdes_lane = true, .rx_stats_cnt = 43, .tx_stats_cnt = 34, @@ -68,6 +69,7 @@ static struct mac_ops rpm2_mac_ops = { .mac_tx_enable = rpm_lmac_tx_enable, .pfc_config = rpm_lmac_pfc_config, .mac_get_pfc_frm_cfg = rpm_lmac_get_pfc_frm_cfg, + .mac_reset = rpm_lmac_reset, }; bool is_dev_rpm2(void *rpmd) @@ -353,8 +355,8 @@ int rpm_lmac_enadis_pause_frm(void *rpmd, int lmac_id, u8 tx_pause, void rpm_lmac_pause_frm_config(void *rpmd, int lmac_id, bool enable) { + u64 cfg, pfc_class_mask_cfg; rpm_t *rpm = rpmd; - u64 cfg; /* ALL pause frames received are completely ignored */ cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); @@ -378,9 +380,11 @@ void rpm_lmac_pause_frm_config(void *rpmd, int lmac_id, bool enable) rpm_write(rpm, 0, RPMX_CMR_CHAN_MSK_OR, ~0ULL); /* Disable all PFC classes */ - cfg = rpm_read(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL); + pfc_class_mask_cfg = is_dev_rpm2(rpm) ? RPM2_CMRX_PRT_CBFC_CTL : + RPMX_CMRX_PRT_CBFC_CTL; + cfg = rpm_read(rpm, lmac_id, pfc_class_mask_cfg); cfg = FIELD_SET(RPM_PFC_CLASS_MASK, 0, cfg); - rpm_write(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL, cfg); + rpm_write(rpm, lmac_id, pfc_class_mask_cfg, cfg); } int rpm_get_rx_stats(void *rpmd, int lmac_id, int idx, u64 *rx_stat) @@ -537,14 +541,15 @@ u32 rpm2_get_lmac_fifo_len(void *rpmd, int lmac_id) int rpm_lmac_internal_loopback(void *rpmd, int lmac_id, bool enable) { rpm_t *rpm = rpmd; - u8 lmac_type; + struct lmac *lmac; u64 cfg; if (!is_lmac_valid(rpm, lmac_id)) return -ENODEV; - lmac_type = rpm->mac_ops->get_lmac_type(rpm, lmac_id); - if (lmac_type == LMAC_MODE_QSGMII || lmac_type == LMAC_MODE_SGMII) { + lmac = lmac_pdata(lmac_id, rpm); + if (lmac->lmac_type == LMAC_MODE_QSGMII || + lmac->lmac_type == LMAC_MODE_SGMII) { dev_err(&rpm->pdev->dev, "loopback not supported for LPC mode\n"); return 0; } @@ -602,8 +607,11 @@ int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause, u16 p if (!is_lmac_valid(rpm, lmac_id)) return -ENODEV; + pfc_class_mask_cfg = is_dev_rpm2(rpm) ? RPM2_CMRX_PRT_CBFC_CTL : + RPMX_CMRX_PRT_CBFC_CTL; + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); - class_en = rpm_read(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL); + class_en = rpm_read(rpm, lmac_id, pfc_class_mask_cfg); pfc_en |= FIELD_GET(RPM_PFC_CLASS_MASK, class_en); if (rx_pause) { @@ -632,10 +640,6 @@ int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause, u16 p cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE; rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); - - pfc_class_mask_cfg = is_dev_rpm2(rpm) ? RPM2_CMRX_PRT_CBFC_CTL : - RPMX_CMRX_PRT_CBFC_CTL; - rpm_write(rpm, lmac_id, pfc_class_mask_cfg, class_en); return 0; @@ -713,3 +717,24 @@ int rpm_get_fec_stats(void *rpmd, int lmac_id, struct cgx_fec_stats_rsp *rsp) return 0; } + +int rpm_lmac_reset(void *rpmd, int lmac_id, u8 pf_req_flr) +{ + u64 rx_logl_xon, cfg; + rpm_t *rpm = rpmd; + + if (!is_lmac_valid(rpm, lmac_id)) + return -ENODEV; + + /* Resetting PFC related CSRs */ + rx_logl_xon = is_dev_rpm2(rpm) ? RPM2_CMRX_RX_LOGL_XON : + RPMX_CMRX_RX_LOGL_XON; + cfg = 0xff; + + rpm_write(rpm, lmac_id, rx_logl_xon, cfg); + + if (pf_req_flr) + rpm_lmac_internal_loopback(rpm, lmac_id, false); + + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h index 22147b4c2137..b79cfbc6f877 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h @@ -74,6 +74,7 @@ #define RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA 0x80A8 #define RPMX_MTI_MAC100X_CL89_PAUSE_QUANTA 0x8108 #define RPM_DEFAULT_PAUSE_TIME 0x7FF +#define RPMX_CMRX_RX_LOGL_XON 0x4100 #define RPMX_MTI_MAC100X_XIF_MODE 0x8100 #define RPMX_ONESTEP_ENABLE BIT_ULL(5) @@ -94,7 +95,8 @@ /* CN10KB CSR Declaration */ #define RPM2_CMRX_SW_INT 0x1b0 -#define RPM2_CMRX_SW_INT_ENA_W1S 0x1b8 +#define RPM2_CMRX_SW_INT_ENA_W1S 0x1c8 +#define RPM2_LMAC_FWI 0x12 #define RPM2_CMR_CHAN_MSK_OR 0x3120 #define RPM2_CMR_RX_OVR_BP_EN BIT_ULL(2) #define RPM2_CMR_RX_OVR_BP_BP BIT_ULL(1) @@ -131,4 +133,5 @@ int rpm_lmac_get_pfc_frm_cfg(void *rpmd, int lmac_id, u8 *tx_pause, int rpm2_get_nr_lmacs(void *rpmd); bool is_dev_rpm2(void *rpmd); int rpm_get_fec_stats(void *cgxd, int lmac_id, struct cgx_fec_stats_rsp *rsp); +int rpm_lmac_reset(void *rpmd, int lmac_id, u8 pf_req_flr); #endif /* RPM_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 9f673bda9dbd..73df2d564545 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2629,6 +2629,7 @@ static void __rvu_flr_handler(struct rvu *rvu, u16 pcifunc) * Since LF is detached use LF number as -1. */ rvu_npc_free_mcam_entries(rvu, pcifunc, -1); + rvu_mac_reset(rvu, pcifunc); mutex_unlock(&rvu->flr_lock); } @@ -3044,9 +3045,8 @@ static int rvu_flr_init(struct rvu *rvu) cfg | BIT_ULL(22)); } - rvu->flr_wq = alloc_workqueue("rvu_afpf_flr", - WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM, - 1); + rvu->flr_wq = alloc_ordered_workqueue("rvu_afpf_flr", + WQ_HIGHPRI | WQ_MEM_RECLAIM); if (!rvu->flr_wq) return -ENOMEM; @@ -3252,7 +3252,7 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id) rvu->ptp = ptp_get(); if (IS_ERR(rvu->ptp)) { err = PTR_ERR(rvu->ptp); - if (err == -EPROBE_DEFER) + if (err) goto err_release_regions; rvu->ptp = NULL; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index d655bf04a483..e8e65fd7888d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -23,6 +23,7 @@ #define PCI_DEVID_OCTEONTX2_LBK 0xA061 /* Subsystem Device ID */ +#define PCI_SUBSYS_DEVID_98XX 0xB100 #define PCI_SUBSYS_DEVID_96XX 0xB200 #define PCI_SUBSYS_DEVID_CN10K_A 0xB900 #define PCI_SUBSYS_DEVID_CNF10K_B 0xBC00 @@ -285,6 +286,22 @@ struct nix_mark_format { u32 *cfg; }; +/* smq(flush) to tl1 cir/pir info */ +struct nix_smq_tree_ctx { + u64 cir_off; + u64 cir_val; + u64 pir_off; + u64 pir_val; +}; + +/* smq flush context */ +struct nix_smq_flush_ctx { + int smq; + u16 tl1_schq; + u16 tl2_schq; + struct nix_smq_tree_ctx smq_tree_ctx[NIX_TXSCH_LVL_CNT]; +}; + struct npc_pkind { struct rsrc_bmap rsrc; u32 *pfchan_map; @@ -346,6 +363,7 @@ struct hw_cap { bool per_pf_mbox_regs; /* PF mbox specified in per PF registers ? */ bool programmable_chans; /* Channels programmable ? */ bool ipolicer; + bool nix_multiple_dwrr_mtu; /* Multiple DWRR_MTU to choose from */ bool npc_hash_extract; /* Hash extract enabled ? */ bool npc_exact_match_enabled; /* Exact match supported ? */ }; @@ -669,6 +687,16 @@ static inline u16 rvu_nix_chan_cpt(struct rvu *rvu, u8 chan) return rvu->hw->cpt_chan_base + chan; } +static inline bool is_rvu_supports_nix1(struct rvu *rvu) +{ + struct pci_dev *pdev = rvu->pdev; + + if (pdev->subsystem_device == PCI_SUBSYS_DEVID_98XX) + return true; + + return false; +} + /* Function Prototypes * RVU */ @@ -802,8 +830,11 @@ int nix_aq_context_read(struct rvu *rvu, struct nix_hw *nix_hw, struct nix_cn10k_aq_enq_rsp *aq_rsp, u16 pcifunc, u8 ctype, u32 qidx); int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc); +int nix_get_dwrr_mtu_reg(struct rvu_hwinfo *hw, int smq_link_type); u32 convert_dwrr_mtu_to_bytes(u8 dwrr_mtu); u32 convert_bytes_to_dwrr_mtu(u32 bytes); +void rvu_nix_tx_tl2_cfg(struct rvu *rvu, int blkaddr, u16 pcifunc, + struct nix_txsch *txsch, bool enable); /* NPC APIs */ void rvu_npc_freemem(struct rvu *rvu); @@ -864,6 +895,7 @@ int rvu_cgx_config_tx(void *cgxd, int lmac_id, bool enable); int rvu_cgx_prio_flow_ctrl_cfg(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause, u16 pfc_en); int rvu_cgx_cfg_pause_frm(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause); +void rvu_mac_reset(struct rvu *rvu, u16 pcifunc); u32 rvu_cgx_get_lmac_fifolen(struct rvu *rvu, int cgx, int lmac); int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, u16 pcifunc, int nixlf, int type); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 83b342fa8d75..095b2cc4a699 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -114,7 +114,7 @@ static void rvu_map_cgx_nix_block(struct rvu *rvu, int pf, p2x = cgx_lmac_get_p2x(cgx_id, lmac_id); /* Firmware sets P2X_SELECT as either NIX0 or NIX1 */ pfvf->nix_blkaddr = BLKADDR_NIX0; - if (p2x == CMR_P2X_SEL_NIX1) + if (is_rvu_supports_nix1(rvu) && p2x == CMR_P2X_SEL_NIX1) pfvf->nix_blkaddr = BLKADDR_NIX1; } @@ -763,7 +763,7 @@ static int rvu_cgx_ptp_rx_cfg(struct rvu *rvu, u16 pcifunc, bool enable) cgxd = rvu_cgx_pdata(cgx_id, rvu); mac_ops = get_mac_ops(cgxd); - mac_ops->mac_enadis_ptp_config(cgxd, lmac_id, true); + mac_ops->mac_enadis_ptp_config(cgxd, lmac_id, enable); /* If PTP is enabled then inform NPC that packets to be * parsed by this PF will have their data shifted by 8 bytes * and if PTP is disabled then no shift is required @@ -1250,3 +1250,21 @@ int rvu_mbox_handler_cgx_prio_flow_ctrl_cfg(struct rvu *rvu, mac_ops->mac_get_pfc_frm_cfg(cgxd, lmac_id, &rsp->tx_pause, &rsp->rx_pause); return err; } + +void rvu_mac_reset(struct rvu *rvu, u16 pcifunc) +{ + int pf = rvu_get_pf(pcifunc); + struct mac_ops *mac_ops; + struct cgx *cgxd; + u8 cgx, lmac; + + if (!is_pf_cgxmapped(rvu, pf)) + return; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx, &lmac); + cgxd = rvu_cgx_pdata(cgx, rvu); + mac_ops = get_mac_ops(cgxd); + + if (mac_ops->mac_reset(cgxd, lmac, !is_vf(pcifunc))) + dev_err(rvu->dev, "Failed to reset MAC\n"); +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 9533b1d92960..3b26893efdf8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -1222,6 +1222,11 @@ static int rvu_dbg_npa_ctx_display(struct seq_file *m, void *unused, int ctype) for (aura = id; aura < max_id; aura++) { aq_req.aura_id = aura; + + /* Skip if queue is uninitialized */ + if (ctype == NPA_AQ_CTYPE_POOL && !test_bit(aura, pfvf->pool_bmap)) + continue; + seq_printf(m, "======%s : %d=======\n", (ctype == NPA_AQ_CTYPE_AURA) ? "AURA" : "POOL", aq_req.aura_id); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index e4407f09c9d3..41df5ac23f92 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -1413,7 +1413,8 @@ static int rvu_af_dl_dwrr_mtu_set(struct devlink *devlink, u32 id, u64 dwrr_mtu; dwrr_mtu = convert_bytes_to_dwrr_mtu(ctx->val.vu32); - rvu_write64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU, dwrr_mtu); + rvu_write64(rvu, BLKADDR_NIX0, + nix_get_dwrr_mtu_reg(rvu->hw, SMQ_LINK_TYPE_RPM), dwrr_mtu); return 0; } @@ -1428,7 +1429,8 @@ static int rvu_af_dl_dwrr_mtu_get(struct devlink *devlink, u32 id, if (!rvu->hw->cap.nix_common_dwrr_mtu) return -EOPNOTSUPP; - dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU); + dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, + nix_get_dwrr_mtu_reg(rvu->hw, SMQ_LINK_TYPE_RPM)); ctx->val.vu32 = convert_dwrr_mtu_to_bytes(dwrr_mtu); return 0; @@ -1438,6 +1440,7 @@ enum rvu_af_dl_param_id { RVU_AF_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, RVU_AF_DEVLINK_PARAM_ID_DWRR_MTU, RVU_AF_DEVLINK_PARAM_ID_NPC_EXACT_FEATURE_DISABLE, + RVU_AF_DEVLINK_PARAM_ID_NPC_MCAM_ZONE_PERCENT, }; static int rvu_af_npc_exact_feature_get(struct devlink *devlink, u32 id, @@ -1494,6 +1497,67 @@ static int rvu_af_npc_exact_feature_validate(struct devlink *devlink, u32 id, return -EFAULT; } +static int rvu_af_dl_npc_mcam_high_zone_percent_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct rvu_devlink *rvu_dl = devlink_priv(devlink); + struct rvu *rvu = rvu_dl->rvu; + struct npc_mcam *mcam; + u32 percent; + + mcam = &rvu->hw->mcam; + percent = (mcam->hprio_count * 100) / mcam->bmap_entries; + ctx->val.vu8 = (u8)percent; + + return 0; +} + +static int rvu_af_dl_npc_mcam_high_zone_percent_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct rvu_devlink *rvu_dl = devlink_priv(devlink); + struct rvu *rvu = rvu_dl->rvu; + struct npc_mcam *mcam; + u32 percent; + + percent = ctx->val.vu8; + mcam = &rvu->hw->mcam; + mcam->hprio_count = (mcam->bmap_entries * percent) / 100; + mcam->hprio_end = mcam->hprio_count; + mcam->lprio_count = (mcam->bmap_entries - mcam->hprio_count) / 2; + mcam->lprio_start = mcam->bmap_entries - mcam->lprio_count; + + return 0; +} + +static int rvu_af_dl_npc_mcam_high_zone_percent_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct rvu_devlink *rvu_dl = devlink_priv(devlink); + struct rvu *rvu = rvu_dl->rvu; + struct npc_mcam *mcam; + + /* The percent of high prio zone must range from 12% to 100% of unreserved mcam space */ + if (val.vu8 < 12 || val.vu8 > 100) { + NL_SET_ERR_MSG_MOD(extack, + "mcam high zone percent must be between 12% to 100%"); + return -EINVAL; + } + + /* Do not allow user to modify the high priority zone entries while mcam entries + * have already been assigned. + */ + mcam = &rvu->hw->mcam; + if (mcam->bmap_fcnt < mcam->bmap_entries) { + NL_SET_ERR_MSG_MOD(extack, + "mcam entries have already been assigned, can't resize"); + return -EPERM; + } + + return 0; +} + static const struct devlink_param rvu_af_dl_params[] = { DEVLINK_PARAM_DRIVER(RVU_AF_DEVLINK_PARAM_ID_DWRR_MTU, "dwrr_mtu", DEVLINK_PARAM_TYPE_U32, @@ -1509,6 +1573,12 @@ static const struct devlink_param rvu_af_dl_param_exact_match[] = { rvu_af_npc_exact_feature_get, rvu_af_npc_exact_feature_disable, rvu_af_npc_exact_feature_validate), + DEVLINK_PARAM_DRIVER(RVU_AF_DEVLINK_PARAM_ID_NPC_MCAM_ZONE_PERCENT, + "npc_mcam_high_zone_percent", DEVLINK_PARAM_TYPE_U8, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + rvu_af_dl_npc_mcam_high_zone_percent_get, + rvu_af_dl_npc_mcam_high_zone_percent_set, + rvu_af_dl_npc_mcam_high_zone_percent_validate), }; /* Devlink switch mode */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index f01d057ad025..23c2f2ed2fb8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -191,6 +191,18 @@ struct nix_hw *get_nix_hw(struct rvu_hwinfo *hw, int blkaddr) return NULL; } +int nix_get_dwrr_mtu_reg(struct rvu_hwinfo *hw, int smq_link_type) +{ + if (hw->cap.nix_multiple_dwrr_mtu) + return NIX_AF_DWRR_MTUX(smq_link_type); + + if (smq_link_type == SMQ_LINK_TYPE_SDP) + return NIX_AF_DWRR_SDP_MTU; + + /* Here it's same reg for RPM and LBK */ + return NIX_AF_DWRR_RPM_MTU; +} + u32 convert_dwrr_mtu_to_bytes(u8 dwrr_mtu) { dwrr_mtu &= 0x1FULL; @@ -834,6 +846,21 @@ static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block, return 0; } +static void nix_get_aq_req_smq(struct rvu *rvu, struct nix_aq_enq_req *req, + u16 *smq, u16 *smq_mask) +{ + struct nix_cn10k_aq_enq_req *aq_req; + + if (!is_rvu_otx2(rvu)) { + aq_req = (struct nix_cn10k_aq_enq_req *)req; + *smq = aq_req->sq.smq; + *smq_mask = aq_req->sq_mask.smq; + } else { + *smq = req->sq.smq; + *smq_mask = req->sq_mask.smq; + } +} + static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw, struct nix_aq_enq_req *req, struct nix_aq_enq_rsp *rsp) @@ -845,6 +872,7 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw, struct rvu_block *block; struct admin_queue *aq; struct rvu_pfvf *pfvf; + u16 smq, smq_mask; void *ctx, *mask; bool ena; u64 cfg; @@ -916,13 +944,14 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw, if (rc) return rc; + nix_get_aq_req_smq(rvu, req, &smq, &smq_mask); /* Check if SQ pointed SMQ belongs to this PF/VF or not */ if (req->ctype == NIX_AQ_CTYPE_SQ && ((req->op == NIX_AQ_INSTOP_INIT && req->sq.ena) || (req->op == NIX_AQ_INSTOP_WRITE && - req->sq_mask.ena && req->sq_mask.smq && req->sq.ena))) { + req->sq_mask.ena && req->sq.ena && smq_mask))) { if (!is_valid_txschq(rvu, blkaddr, NIX_TXSCH_LVL_SMQ, - pcifunc, req->sq.smq)) + pcifunc, smq)) return NIX_AF_ERR_AQ_ENQUEUE; } @@ -1691,6 +1720,42 @@ exit: return true; } +static void nix_reset_tx_schedule(struct rvu *rvu, int blkaddr, + int lvl, int schq) +{ + u64 tlx_parent = 0, tlx_schedule = 0; + + switch (lvl) { + case NIX_TXSCH_LVL_TL2: + tlx_parent = NIX_AF_TL2X_PARENT(schq); + tlx_schedule = NIX_AF_TL2X_SCHEDULE(schq); + break; + case NIX_TXSCH_LVL_TL3: + tlx_parent = NIX_AF_TL3X_PARENT(schq); + tlx_schedule = NIX_AF_TL3X_SCHEDULE(schq); + break; + case NIX_TXSCH_LVL_TL4: + tlx_parent = NIX_AF_TL4X_PARENT(schq); + tlx_schedule = NIX_AF_TL4X_SCHEDULE(schq); + break; + case NIX_TXSCH_LVL_MDQ: + /* no need to reset SMQ_CFG as HW clears this CSR + * on SMQ flush + */ + tlx_parent = NIX_AF_MDQX_PARENT(schq); + tlx_schedule = NIX_AF_MDQX_SCHEDULE(schq); + break; + default: + return; + } + + if (tlx_parent) + rvu_write64(rvu, blkaddr, tlx_parent, 0x0); + + if (tlx_schedule) + rvu_write64(rvu, blkaddr, tlx_schedule, 0x0); +} + /* Disable shaping of pkts by a scheduler queue * at a given scheduler level. */ @@ -2040,6 +2105,7 @@ int rvu_mbox_handler_nix_txsch_alloc(struct rvu *rvu, pfvf_map[schq] = TXSCH_MAP(pcifunc, 0); nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq); nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq); + nix_reset_tx_schedule(rvu, blkaddr, lvl, schq); } for (idx = 0; idx < req->schq[lvl]; idx++) { @@ -2049,6 +2115,7 @@ int rvu_mbox_handler_nix_txsch_alloc(struct rvu *rvu, pfvf_map[schq] = TXSCH_MAP(pcifunc, 0); nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq); nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq); + nix_reset_tx_schedule(rvu, blkaddr, lvl, schq); } } @@ -2065,9 +2132,121 @@ exit: return rc; } +static void nix_smq_flush_fill_ctx(struct rvu *rvu, int blkaddr, int smq, + struct nix_smq_flush_ctx *smq_flush_ctx) +{ + struct nix_smq_tree_ctx *smq_tree_ctx; + u64 parent_off, regval; + u16 schq; + int lvl; + + smq_flush_ctx->smq = smq; + + schq = smq; + for (lvl = NIX_TXSCH_LVL_SMQ; lvl <= NIX_TXSCH_LVL_TL1; lvl++) { + smq_tree_ctx = &smq_flush_ctx->smq_tree_ctx[lvl]; + if (lvl == NIX_TXSCH_LVL_TL1) { + smq_flush_ctx->tl1_schq = schq; + smq_tree_ctx->cir_off = NIX_AF_TL1X_CIR(schq); + smq_tree_ctx->pir_off = 0; + smq_tree_ctx->pir_val = 0; + parent_off = 0; + } else if (lvl == NIX_TXSCH_LVL_TL2) { + smq_flush_ctx->tl2_schq = schq; + smq_tree_ctx->cir_off = NIX_AF_TL2X_CIR(schq); + smq_tree_ctx->pir_off = NIX_AF_TL2X_PIR(schq); + parent_off = NIX_AF_TL2X_PARENT(schq); + } else if (lvl == NIX_TXSCH_LVL_TL3) { + smq_tree_ctx->cir_off = NIX_AF_TL3X_CIR(schq); + smq_tree_ctx->pir_off = NIX_AF_TL3X_PIR(schq); + parent_off = NIX_AF_TL3X_PARENT(schq); + } else if (lvl == NIX_TXSCH_LVL_TL4) { + smq_tree_ctx->cir_off = NIX_AF_TL4X_CIR(schq); + smq_tree_ctx->pir_off = NIX_AF_TL4X_PIR(schq); + parent_off = NIX_AF_TL4X_PARENT(schq); + } else if (lvl == NIX_TXSCH_LVL_MDQ) { + smq_tree_ctx->cir_off = NIX_AF_MDQX_CIR(schq); + smq_tree_ctx->pir_off = NIX_AF_MDQX_PIR(schq); + parent_off = NIX_AF_MDQX_PARENT(schq); + } + /* save cir/pir register values */ + smq_tree_ctx->cir_val = rvu_read64(rvu, blkaddr, smq_tree_ctx->cir_off); + if (smq_tree_ctx->pir_off) + smq_tree_ctx->pir_val = rvu_read64(rvu, blkaddr, smq_tree_ctx->pir_off); + + /* get parent txsch node */ + if (parent_off) { + regval = rvu_read64(rvu, blkaddr, parent_off); + schq = (regval >> 16) & 0x1FF; + } + } +} + +static void nix_smq_flush_enadis_xoff(struct rvu *rvu, int blkaddr, + struct nix_smq_flush_ctx *smq_flush_ctx, bool enable) +{ + struct nix_txsch *txsch; + struct nix_hw *nix_hw; + u64 regoff; + int tl2; + + nix_hw = get_nix_hw(rvu->hw, blkaddr); + if (!nix_hw) + return; + + /* loop through all TL2s with matching PF_FUNC */ + txsch = &nix_hw->txsch[NIX_TXSCH_LVL_TL2]; + for (tl2 = 0; tl2 < txsch->schq.max; tl2++) { + /* skip the smq(flush) TL2 */ + if (tl2 == smq_flush_ctx->tl2_schq) + continue; + /* skip unused TL2s */ + if (TXSCH_MAP_FLAGS(txsch->pfvf_map[tl2]) & NIX_TXSCHQ_FREE) + continue; + /* skip if PF_FUNC doesn't match */ + if ((TXSCH_MAP_FUNC(txsch->pfvf_map[tl2]) & ~RVU_PFVF_FUNC_MASK) != + (TXSCH_MAP_FUNC(txsch->pfvf_map[smq_flush_ctx->tl2_schq] & + ~RVU_PFVF_FUNC_MASK))) + continue; + /* enable/disable XOFF */ + regoff = NIX_AF_TL2X_SW_XOFF(tl2); + if (enable) + rvu_write64(rvu, blkaddr, regoff, 0x1); + else + rvu_write64(rvu, blkaddr, regoff, 0x0); + } +} + +static void nix_smq_flush_enadis_rate(struct rvu *rvu, int blkaddr, + struct nix_smq_flush_ctx *smq_flush_ctx, bool enable) +{ + u64 cir_off, pir_off, cir_val, pir_val; + struct nix_smq_tree_ctx *smq_tree_ctx; + int lvl; + + for (lvl = NIX_TXSCH_LVL_SMQ; lvl <= NIX_TXSCH_LVL_TL1; lvl++) { + smq_tree_ctx = &smq_flush_ctx->smq_tree_ctx[lvl]; + cir_off = smq_tree_ctx->cir_off; + cir_val = smq_tree_ctx->cir_val; + pir_off = smq_tree_ctx->pir_off; + pir_val = smq_tree_ctx->pir_val; + + if (enable) { + rvu_write64(rvu, blkaddr, cir_off, cir_val); + if (lvl != NIX_TXSCH_LVL_TL1) + rvu_write64(rvu, blkaddr, pir_off, pir_val); + } else { + rvu_write64(rvu, blkaddr, cir_off, 0x0); + if (lvl != NIX_TXSCH_LVL_TL1) + rvu_write64(rvu, blkaddr, pir_off, 0x0); + } + } +} + static int nix_smq_flush(struct rvu *rvu, int blkaddr, int smq, u16 pcifunc, int nixlf) { + struct nix_smq_flush_ctx *smq_flush_ctx; int pf = rvu_get_pf(pcifunc); u8 cgx_id = 0, lmac_id = 0; int err, restore_tx_en = 0; @@ -2087,6 +2266,14 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr, lmac_id, true); } + /* XOFF all TL2s whose parent TL1 matches SMQ tree TL1 */ + smq_flush_ctx = kzalloc(sizeof(*smq_flush_ctx), GFP_KERNEL); + if (!smq_flush_ctx) + return -ENOMEM; + nix_smq_flush_fill_ctx(rvu, blkaddr, smq, smq_flush_ctx); + nix_smq_flush_enadis_xoff(rvu, blkaddr, smq_flush_ctx, true); + nix_smq_flush_enadis_rate(rvu, blkaddr, smq_flush_ctx, false); + cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq)); /* Do SMQ flush and set enqueue xoff */ cfg |= BIT_ULL(50) | BIT_ULL(49); @@ -2101,8 +2288,14 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr, err = rvu_poll_reg(rvu, blkaddr, NIX_AF_SMQX_CFG(smq), BIT_ULL(49), true); if (err) - dev_err(rvu->dev, - "NIXLF%d: SMQ%d flush failed\n", nixlf, smq); + dev_info(rvu->dev, + "NIXLF%d: SMQ%d flush failed, txlink might be busy\n", + nixlf, smq); + + /* clear XOFF on TL2s */ + nix_smq_flush_enadis_rate(rvu, blkaddr, smq_flush_ctx, true); + nix_smq_flush_enadis_xoff(rvu, blkaddr, smq_flush_ctx, false); + kfree(smq_flush_ctx); rvu_cgx_enadis_rx_bp(rvu, pf, true); /* restore cgx tx state */ @@ -2144,6 +2337,7 @@ static int nix_txschq_free(struct rvu *rvu, u16 pcifunc) continue; nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq); nix_clear_tx_xoff(rvu, blkaddr, lvl, schq); + nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq); } } nix_clear_tx_xoff(rvu, blkaddr, NIX_TXSCH_LVL_TL1, @@ -2182,6 +2376,7 @@ static int nix_txschq_free(struct rvu *rvu, u16 pcifunc) for (schq = 0; schq < txsch->schq.max; schq++) { if (TXSCH_MAP_FUNC(txsch->pfvf_map[schq]) != pcifunc) continue; + nix_reset_tx_schedule(rvu, blkaddr, lvl, schq); rvu_free_rsrc(&txsch->schq, schq); txsch->pfvf_map[schq] = TXSCH_MAP(0, NIX_TXSCHQ_FREE); } @@ -2241,6 +2436,9 @@ static int nix_txschq_free_one(struct rvu *rvu, */ nix_clear_tx_xoff(rvu, blkaddr, lvl, schq); + nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq); + nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq); + /* Flush if it is a SMQ. Onus of disabling * TL2/3 queue links before SMQ flush is on user */ @@ -2250,6 +2448,8 @@ static int nix_txschq_free_one(struct rvu *rvu, goto err; } + nix_reset_tx_schedule(rvu, blkaddr, lvl, schq); + /* Free the resource */ rvu_free_rsrc(&txsch->schq, schq); txsch->pfvf_map[schq] = TXSCH_MAP(0, NIX_TXSCHQ_FREE); @@ -2404,17 +2604,19 @@ static int nix_txschq_cfg_read(struct rvu *rvu, struct nix_hw *nix_hw, return 0; } -static void rvu_nix_tx_tl2_cfg(struct rvu *rvu, int blkaddr, - u16 pcifunc, struct nix_txsch *txsch) +void rvu_nix_tx_tl2_cfg(struct rvu *rvu, int blkaddr, u16 pcifunc, + struct nix_txsch *txsch, bool enable) { struct rvu_hwinfo *hw = rvu->hw; int lbk_link_start, lbk_links; u8 pf = rvu_get_pf(pcifunc); int schq; + u64 cfg; if (!is_pf_cgxmapped(rvu, pf)) return; + cfg = enable ? (BIT_ULL(12) | RVU_SWITCH_LBK_CHAN) : 0; lbk_link_start = hw->cgx_links; for (schq = 0; schq < txsch->schq.max; schq++) { @@ -2428,8 +2630,7 @@ static void rvu_nix_tx_tl2_cfg(struct rvu *rvu, int blkaddr, rvu_write64(rvu, blkaddr, NIX_AF_TL3_TL2X_LINKX_CFG(schq, lbk_link_start + - lbk_links), - BIT_ULL(12) | RVU_SWITCH_LBK_CHAN); + lbk_links), cfg); } } @@ -2535,8 +2736,6 @@ int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu, rvu_write64(rvu, blkaddr, reg, regval); } - rvu_nix_tx_tl2_cfg(rvu, blkaddr, pcifunc, - &nix_hw->txsch[NIX_TXSCH_LVL_TL2]); return 0; } @@ -3147,10 +3346,16 @@ static int nix_setup_txschq(struct rvu *rvu, struct nix_hw *nix_hw, int blkaddr) } /* Setup a default value of 8192 as DWRR MTU */ - if (rvu->hw->cap.nix_common_dwrr_mtu) { - rvu_write64(rvu, blkaddr, NIX_AF_DWRR_RPM_MTU, + if (rvu->hw->cap.nix_common_dwrr_mtu || + rvu->hw->cap.nix_multiple_dwrr_mtu) { + rvu_write64(rvu, blkaddr, + nix_get_dwrr_mtu_reg(rvu->hw, SMQ_LINK_TYPE_RPM), convert_bytes_to_dwrr_mtu(8192)); - rvu_write64(rvu, blkaddr, NIX_AF_DWRR_SDP_MTU, + rvu_write64(rvu, blkaddr, + nix_get_dwrr_mtu_reg(rvu->hw, SMQ_LINK_TYPE_LBK), + convert_bytes_to_dwrr_mtu(8192)); + rvu_write64(rvu, blkaddr, + nix_get_dwrr_mtu_reg(rvu->hw, SMQ_LINK_TYPE_SDP), convert_bytes_to_dwrr_mtu(8192)); } @@ -3248,19 +3453,28 @@ int rvu_mbox_handler_nix_get_hw_info(struct rvu *rvu, struct msg_req *req, rsp->min_mtu = NIC_HW_MIN_FRS; - if (!rvu->hw->cap.nix_common_dwrr_mtu) { + if (!rvu->hw->cap.nix_common_dwrr_mtu && + !rvu->hw->cap.nix_multiple_dwrr_mtu) { /* Return '1' on OTx2 */ rsp->rpm_dwrr_mtu = 1; rsp->sdp_dwrr_mtu = 1; + rsp->lbk_dwrr_mtu = 1; return 0; } - dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU); + /* Return DWRR_MTU for TLx_SCHEDULE[RR_WEIGHT] config */ + dwrr_mtu = rvu_read64(rvu, blkaddr, + nix_get_dwrr_mtu_reg(rvu->hw, SMQ_LINK_TYPE_RPM)); rsp->rpm_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu); - dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_SDP_MTU); + dwrr_mtu = rvu_read64(rvu, blkaddr, + nix_get_dwrr_mtu_reg(rvu->hw, SMQ_LINK_TYPE_SDP)); rsp->sdp_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu); + dwrr_mtu = rvu_read64(rvu, blkaddr, + nix_get_dwrr_mtu_reg(rvu->hw, SMQ_LINK_TYPE_LBK)); + rsp->lbk_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu); + return 0; } @@ -3309,6 +3523,7 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) struct nix_rx_flowkey_alg *field; struct nix_rx_flowkey_alg tmp; u32 key_type, valid_key; + u32 l3_l4_src_dst; int l4_key_offset = 0; if (!alg) @@ -3336,6 +3551,15 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) * group_member - Enabled when protocol is part of a group. */ + /* Last 4 bits (31:28) are reserved to specify SRC, DST + * selection for L3, L4 i.e IPV[4,6]_SRC, IPV[4,6]_DST, + * [TCP,UDP,SCTP]_SRC, [TCP,UDP,SCTP]_DST + * 31 => L3_SRC, 30 => L3_DST, 29 => L4_SRC, 28 => L4_DST + */ + l3_l4_src_dst = flow_cfg; + /* Reset these 4 bits, so that these won't be part of key */ + flow_cfg &= NIX_FLOW_KEY_TYPE_L3_L4_MASK; + keyoff_marker = 0; max_key_off = 0; group_member = 0; nr_field = 0; key_off = 0; field_marker = 1; field = &tmp; max_bit_pos = fls(flow_cfg); @@ -3373,6 +3597,22 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) } field->hdr_offset = 12; /* SIP offset */ field->bytesm1 = 7; /* SIP + DIP, 8 bytes */ + + /* Only SIP */ + if (l3_l4_src_dst & NIX_FLOW_KEY_TYPE_L3_SRC_ONLY) + field->bytesm1 = 3; /* SIP, 4 bytes */ + + if (l3_l4_src_dst & NIX_FLOW_KEY_TYPE_L3_DST_ONLY) { + /* Both SIP + DIP */ + if (field->bytesm1 == 3) { + field->bytesm1 = 7; /* SIP + DIP, 8B */ + } else { + /* Only DIP */ + field->hdr_offset = 16; /* DIP off */ + field->bytesm1 = 3; /* DIP, 4 bytes */ + } + } + field->ltype_mask = 0xF; /* Match only IPv4 */ keyoff_marker = false; break; @@ -3386,6 +3626,22 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) } field->hdr_offset = 8; /* SIP offset */ field->bytesm1 = 31; /* SIP + DIP, 32 bytes */ + + /* Only SIP */ + if (l3_l4_src_dst & NIX_FLOW_KEY_TYPE_L3_SRC_ONLY) + field->bytesm1 = 15; /* SIP, 16 bytes */ + + if (l3_l4_src_dst & NIX_FLOW_KEY_TYPE_L3_DST_ONLY) { + /* Both SIP + DIP */ + if (field->bytesm1 == 15) { + /* SIP + DIP, 32 bytes */ + field->bytesm1 = 31; + } else { + /* Only DIP */ + field->hdr_offset = 24; /* DIP off */ + field->bytesm1 = 15; /* DIP,16 bytes */ + } + } field->ltype_mask = 0xF; /* Match only IPv6 */ break; case NIX_FLOW_KEY_TYPE_TCP: @@ -3401,6 +3657,21 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) field->lid = NPC_LID_LH; field->bytesm1 = 3; /* Sport + Dport, 4 bytes */ + if (l3_l4_src_dst & NIX_FLOW_KEY_TYPE_L4_SRC_ONLY) + field->bytesm1 = 1; /* SRC, 2 bytes */ + + if (l3_l4_src_dst & NIX_FLOW_KEY_TYPE_L4_DST_ONLY) { + /* Both SRC + DST */ + if (field->bytesm1 == 1) { + /* SRC + DST, 4 bytes */ + field->bytesm1 = 3; + } else { + /* Only DIP */ + field->hdr_offset = 2; /* DST off */ + field->bytesm1 = 1; /* DST, 2 bytes */ + } + } + /* Enum values for NPC_LID_LD and NPC_LID_LG are same, * so no need to change the ltype_match, just change * the lid for inner protocols @@ -3815,21 +4086,14 @@ int rvu_mbox_handler_nix_set_rx_mode(struct rvu *rvu, struct nix_rx_mode *req, } /* install/uninstall promisc entry */ - if (promisc) { + if (promisc) rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf, pfvf->rx_chan_base, pfvf->rx_chan_cnt); - - if (rvu_npc_exact_has_match_table(rvu)) - rvu_npc_exact_promisc_enable(rvu, pcifunc); - } else { + else if (!nix_rx_multicast) rvu_npc_enable_promisc_entry(rvu, pcifunc, nixlf, false); - if (rvu_npc_exact_has_match_table(rvu)) - rvu_npc_exact_promisc_disable(rvu, pcifunc); - } - return 0; } @@ -4023,9 +4287,10 @@ rx_frscfg: if (link < 0) return NIX_AF_ERR_RX_LINK_INVALID; - nix_find_link_frs(rvu, req, pcifunc); linkcfg: + nix_find_link_frs(rvu, req, pcifunc); + cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_LINKX_CFG(link)); cfg = (cfg & ~(0xFFFFULL << 16)) | ((u64)req->maxlen << 16); if (req->update_minlen) @@ -4069,6 +4334,11 @@ int rvu_mbox_handler_nix_set_rx_cfg(struct rvu *rvu, struct nix_rx_cfg *req, else cfg &= ~BIT_ULL(40); + if (req->len_verify & NIX_RX_DROP_RE) + cfg |= BIT_ULL(32); + else + cfg &= ~BIT_ULL(32); + if (req->csum_verify & BIT(0)) cfg |= BIT_ULL(37); else @@ -4266,8 +4536,11 @@ static void rvu_nix_setup_capabilities(struct rvu *rvu, int blkaddr) * Check if HW uses a common MTU for all DWRR quantum configs. * On OcteonTx2 this register field is '0'. */ - if (((hw_const >> 56) & 0x10) == 0x10) + if ((((hw_const >> 56) & 0x10) == 0x10) && !(hw_const & BIT_ULL(61))) hw->cap.nix_common_dwrr_mtu = true; + + if (hw_const & BIT_ULL(61)) + hw->cap.nix_multiple_dwrr_mtu = true; } static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c index 9f11c1e40737..7e20282c12d0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c @@ -218,13 +218,54 @@ void npc_config_secret_key(struct rvu *rvu, int blkaddr) void npc_program_mkex_hash(struct rvu *rvu, int blkaddr) { + struct npc_mcam_kex_hash *mh = rvu->kpu.mkex_hash; struct hw_cap *hwcap = &rvu->hw->cap; + u8 intf, ld, hdr_offset, byte_len; struct rvu_hwinfo *hw = rvu->hw; - u8 intf; + u64 cfg; + /* Check if hardware supports hash extraction */ if (!hwcap->npc_hash_extract) return; + /* Check if IPv6 source/destination address + * should be hash enabled. + * Hashing reduces 128bit SIP/DIP fields to 32bit + * so that 224 bit X2 key can be used for IPv6 based filters as well, + * which in turn results in more number of MCAM entries available for + * use. + * + * Hashing of IPV6 SIP/DIP is enabled in below scenarios + * 1. If the silicon variant supports hashing feature + * 2. If the number of bytes of IP addr being extracted is 4 bytes ie + * 32bit. The assumption here is that if user wants 8bytes of LSB of + * IP addr or full 16 bytes then his intention is not to use 32bit + * hash. + */ + for (intf = 0; intf < hw->npc_intfs; intf++) { + for (ld = 0; ld < NPC_MAX_LD; ld++) { + cfg = rvu_read64(rvu, blkaddr, + NPC_AF_INTFX_LIDX_LTX_LDX_CFG(intf, + NPC_LID_LC, + NPC_LT_LC_IP6, + ld)); + hdr_offset = FIELD_GET(NPC_HDR_OFFSET, cfg); + byte_len = FIELD_GET(NPC_BYTESM, cfg); + /* Hashing of IPv6 source/destination address should be + * enabled if, + * hdr_offset == 8 (offset of source IPv6 address) or + * hdr_offset == 24 (offset of destination IPv6) + * address) and the number of byte to be + * extracted is 4. As per hardware configuration + * byte_len should be == actual byte_len - 1. + * Hence byte_len is checked against 3 but nor 4. + */ + if ((hdr_offset == 8 || hdr_offset == 24) && byte_len == 3) + mh->lid_lt_ld_hash_en[intf][NPC_LID_LC][NPC_LT_LC_IP6][ld] = true; + } + } + + /* Update hash configuration if the field is hash enabled */ for (intf = 0; intf < hw->npc_intfs; intf++) { npc_program_mkex_hash_rx(rvu, blkaddr, intf); npc_program_mkex_hash_tx(rvu, blkaddr, intf); @@ -1164,8 +1205,10 @@ static u16 __rvu_npc_exact_cmd_rules_cnt_update(struct rvu *rvu, int drop_mcam_i { struct npc_exact_table *table; u16 *cnt, old_cnt; + bool promisc; table = rvu->hw->table; + promisc = table->promisc_mode[drop_mcam_idx]; cnt = &table->cnt_cmd_rules[drop_mcam_idx]; old_cnt = *cnt; @@ -1177,13 +1220,18 @@ static u16 __rvu_npc_exact_cmd_rules_cnt_update(struct rvu *rvu, int drop_mcam_i *enable_or_disable_cam = false; - /* If all rules are deleted, disable cam */ + if (promisc) + goto done; + + /* If all rules are deleted and not already in promisc mode; + * disable cam + */ if (!*cnt && val < 0) { *enable_or_disable_cam = true; goto done; } - /* If rule got added, enable cam */ + /* If rule got added and not already in promisc mode; enable cam */ if (!old_cnt && val > 0) { *enable_or_disable_cam = true; goto done; @@ -1462,6 +1510,12 @@ int rvu_npc_exact_promisc_disable(struct rvu *rvu, u16 pcifunc) *promisc = false; mutex_unlock(&table->lock); + /* Enable drop rule */ + rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, + true); + + dev_dbg(rvu->dev, "%s: disabled promisc mode (cgx=%d lmac=%d)\n", + __func__, cgx_id, lmac_id); return 0; } @@ -1503,6 +1557,12 @@ int rvu_npc_exact_promisc_enable(struct rvu *rvu, u16 pcifunc) *promisc = true; mutex_unlock(&table->lock); + /* disable drop rule */ + rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, + false); + + dev_dbg(rvu->dev, "%s: Enabled promisc mode (cgx=%d lmac=%d)\n", + __func__, cgx_id, lmac_id); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.h index a1c3d987b804..57a09328d46b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.h @@ -70,8 +70,8 @@ static struct npc_mcam_kex_hash npc_mkex_hash_default __maybe_unused = { [NIX_INTF_RX] = { [NPC_LID_LC] = { [NPC_LT_LC_IP6] = { - true, - true, + false, + false, }, }, }, @@ -79,8 +79,8 @@ static struct npc_mcam_kex_hash npc_mkex_hash_default __maybe_unused = { [NIX_INTF_TX] = { [NPC_LID_LC] = { [NPC_LT_LC_IP6] = { - true, - true, + false, + false, }, }, }, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h index 7007f0b8e659..b42e631e52d0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h @@ -272,7 +272,8 @@ #define NIX_AF_DEBUG_NPC_RESP_DATAX(a) (0x680 | (a) << 3) #define NIX_AF_SMQX_CFG(a) (0x700 | (a) << 16) #define NIX_AF_SQM_DBG_CTL_STATUS (0x750) -#define NIX_AF_DWRR_SDP_MTU (0x790) +#define NIX_AF_DWRR_SDP_MTU (0x790) /* All CN10K except CN10KB */ +#define NIX_AF_DWRR_MTUX(a) (0x790 | (a) << 16) /* Only for CN10KB */ #define NIX_AF_DWRR_RPM_MTU (0x7A0) #define NIX_AF_PSE_CHANNEL_LEVEL (0x800) #define NIX_AF_PSE_SHAPER_CFG (0x810) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c index 3392487f6b47..592b317f4637 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c @@ -8,6 +8,17 @@ #include <linux/bitfield.h> #include "rvu.h" +static void rvu_switch_enable_lbk_link(struct rvu *rvu, u16 pcifunc, bool enable) +{ + struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); + struct nix_hw *nix_hw; + + nix_hw = get_nix_hw(rvu->hw, pfvf->nix_blkaddr); + /* Enable LBK links with channel 63 for TX MCAM rule */ + rvu_nix_tx_tl2_cfg(rvu, pfvf->nix_blkaddr, pcifunc, + &nix_hw->txsch[NIX_TXSCH_LVL_TL2], enable); +} + static int rvu_switch_install_rx_rule(struct rvu *rvu, u16 pcifunc, u16 chan_mask) { @@ -52,6 +63,8 @@ static int rvu_switch_install_tx_rule(struct rvu *rvu, u16 pcifunc, u16 entry) if (!test_bit(NIXLF_INITIALIZED, &pfvf->flags)) return 0; + rvu_switch_enable_lbk_link(rvu, pcifunc, true); + lbkid = pfvf->nix_blkaddr == BLKADDR_NIX0 ? 0 : 1; ether_addr_copy(req.packet.dmac, pfvf->mac_addr); eth_broadcast_addr((u8 *)&req.mask.dmac); @@ -218,6 +231,9 @@ void rvu_switch_disable(struct rvu *rvu) "Reverting RX rule for PF%d failed(%d)\n", pf, err); + /* Disable LBK link */ + rvu_switch_enable_lbk_link(rvu, pcifunc, false); + rvu_get_pf_numvfs(rvu, pf, &numvfs, NULL); for (vf = 0; vf < numvfs; vf++) { pcifunc = pf << 10 | ((vf + 1) & 0x3FF); @@ -226,6 +242,8 @@ void rvu_switch_disable(struct rvu *rvu) dev_err(rvu->dev, "Reverting RX rule for PF%dVF%d failed(%d)\n", pf, vf, err); + + rvu_switch_enable_lbk_link(rvu, pcifunc, false); } } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile index 73fdb8798614..5664f768cb0c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_OCTEONTX2_VF) += rvu_nicvf.o otx2_ptp.o rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \ otx2_flows.o otx2_tc.o cn10k.o otx2_dmac_flt.o \ - otx2_devlink.o + otx2_devlink.o qos_sq.o qos.o rvu_nicvf-y := otx2_vf.o otx2_devlink.o rvu_nicpf-$(CONFIG_DCB) += otx2_dcbnl.o diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c index 826f691de259..a4a258da8dd5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c @@ -107,12 +107,13 @@ int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura) } #define NPA_MAX_BURST 16 -void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq) +int cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq) { struct otx2_nic *pfvf = dev; + int cnt = cq->pool_ptrs; u64 ptrs[NPA_MAX_BURST]; - int num_ptrs = 1; dma_addr_t bufptr; + int num_ptrs = 1; /* Refill pool with new buffers */ while (cq->pool_ptrs) { @@ -131,6 +132,7 @@ void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq) num_ptrs = 1; } } + return cnt - cq->pool_ptrs; } void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h index 8ae96815865e..c1861f7de254 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h @@ -24,7 +24,7 @@ static inline int mtu_to_dwrr_weight(struct otx2_nic *pfvf, int mtu) return weight; } -void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq); +int cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq); void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx); int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura); int cn10k_lmtst_init(struct otx2_nic *pfvf); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c index a487a98eac88..59b138214af2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c @@ -4,9 +4,9 @@ * Copyright (C) 2022 Marvell. */ +#include <crypto/skcipher.h> #include <linux/rtnetlink.h> #include <linux/bitfield.h> -#include <net/macsec.h> #include "otx2_common.h" #define MCS_TCAM0_MAC_DA_MASK GENMASK_ULL(47, 0) @@ -43,6 +43,56 @@ #define MCS_TCI_E 0x08 /* encryption */ #define MCS_TCI_C 0x04 /* changed text */ +#define CN10K_MAX_HASH_LEN 16 +#define CN10K_MAX_SAK_LEN 32 + +static int cn10k_ecb_aes_encrypt(struct otx2_nic *pfvf, u8 *sak, + u16 sak_len, u8 *hash) +{ + u8 data[CN10K_MAX_HASH_LEN] = { 0 }; + struct skcipher_request *req = NULL; + struct scatterlist sg_src, sg_dst; + struct crypto_skcipher *tfm; + DECLARE_CRYPTO_WAIT(wait); + int err; + + tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0); + if (IS_ERR(tfm)) { + dev_err(pfvf->dev, "failed to allocate transform for ecb-aes\n"); + return PTR_ERR(tfm); + } + + req = skcipher_request_alloc(tfm, GFP_KERNEL); + if (!req) { + dev_err(pfvf->dev, "failed to allocate request for skcipher\n"); + err = -ENOMEM; + goto free_tfm; + } + + err = crypto_skcipher_setkey(tfm, sak, sak_len); + if (err) { + dev_err(pfvf->dev, "failed to set key for skcipher\n"); + goto free_req; + } + + /* build sg list */ + sg_init_one(&sg_src, data, CN10K_MAX_HASH_LEN); + sg_init_one(&sg_dst, hash, CN10K_MAX_HASH_LEN); + + skcipher_request_set_callback(req, 0, crypto_req_done, &wait); + skcipher_request_set_crypt(req, &sg_src, &sg_dst, + CN10K_MAX_HASH_LEN, NULL); + + err = crypto_skcipher_encrypt(req); + err = crypto_wait_req(err, &wait); + +free_req: + skcipher_request_free(req); +free_tfm: + crypto_free_skcipher(tfm); + return err; +} + static struct cn10k_mcs_txsc *cn10k_mcs_get_txsc(struct cn10k_mcs_cfg *cfg, struct macsec_secy *secy) { @@ -212,6 +262,7 @@ static int cn10k_mcs_write_rx_secy(struct otx2_nic *pfvf, struct mcs_secy_plcy_write_req *req; struct mbox *mbox = &pfvf->mbox; u64 policy; + u8 cipher; int ret; mutex_lock(&mbox->lock); @@ -227,7 +278,21 @@ static int cn10k_mcs_write_rx_secy(struct otx2_nic *pfvf, policy |= MCS_RX_SECY_PLCY_RP; policy |= MCS_RX_SECY_PLCY_AUTH_ENA; - policy |= FIELD_PREP(MCS_RX_SECY_PLCY_CIP, MCS_GCM_AES_128); + + switch (secy->key_len) { + case 16: + cipher = secy->xpn ? MCS_GCM_AES_XPN_128 : MCS_GCM_AES_128; + break; + case 32: + cipher = secy->xpn ? MCS_GCM_AES_XPN_256 : MCS_GCM_AES_256; + break; + default: + cipher = MCS_GCM_AES_128; + dev_warn(pfvf->dev, "Unsupported key length\n"); + break; + } + + policy |= FIELD_PREP(MCS_RX_SECY_PLCY_CIP, cipher); policy |= FIELD_PREP(MCS_RX_SECY_PLCY_VAL, secy->validate_frames); policy |= MCS_RX_SECY_PLCY_ENA; @@ -316,16 +381,53 @@ fail: return ret; } +static int cn10k_mcs_write_keys(struct otx2_nic *pfvf, + struct macsec_secy *secy, + struct mcs_sa_plcy_write_req *req, + u8 *sak, u8 *salt, ssci_t ssci) +{ + u8 hash_rev[CN10K_MAX_HASH_LEN]; + u8 sak_rev[CN10K_MAX_SAK_LEN]; + u8 salt_rev[MACSEC_SALT_LEN]; + u8 hash[CN10K_MAX_HASH_LEN]; + u32 ssci_63_32; + int err, i; + + err = cn10k_ecb_aes_encrypt(pfvf, sak, secy->key_len, hash); + if (err) { + dev_err(pfvf->dev, "Generating hash using ECB(AES) failed\n"); + return err; + } + + for (i = 0; i < secy->key_len; i++) + sak_rev[i] = sak[secy->key_len - 1 - i]; + + for (i = 0; i < CN10K_MAX_HASH_LEN; i++) + hash_rev[i] = hash[CN10K_MAX_HASH_LEN - 1 - i]; + + for (i = 0; i < MACSEC_SALT_LEN; i++) + salt_rev[i] = salt[MACSEC_SALT_LEN - 1 - i]; + + ssci_63_32 = (__force u32)cpu_to_be32((__force u32)ssci); + + memcpy(&req->plcy[0][0], sak_rev, secy->key_len); + memcpy(&req->plcy[0][4], hash_rev, CN10K_MAX_HASH_LEN); + memcpy(&req->plcy[0][6], salt_rev, MACSEC_SALT_LEN); + req->plcy[0][7] |= (u64)ssci_63_32 << 32; + + return 0; +} + static int cn10k_mcs_write_rx_sa_plcy(struct otx2_nic *pfvf, struct macsec_secy *secy, struct cn10k_mcs_rxsc *rxsc, u8 assoc_num, bool sa_in_use) { - unsigned char *src = rxsc->sa_key[assoc_num]; struct mcs_sa_plcy_write_req *plcy_req; + u8 *sak = rxsc->sa_key[assoc_num]; + u8 *salt = rxsc->salt[assoc_num]; struct mcs_rx_sc_sa_map *map_req; struct mbox *mbox = &pfvf->mbox; - u8 reg, key_len; int ret; mutex_lock(&mbox->lock); @@ -343,11 +445,10 @@ static int cn10k_mcs_write_rx_sa_plcy(struct otx2_nic *pfvf, goto fail; } - for (reg = 0, key_len = 0; key_len < secy->key_len; key_len += 8) { - memcpy((u8 *)&plcy_req->plcy[0][reg], - (src + reg * 8), 8); - reg++; - } + ret = cn10k_mcs_write_keys(pfvf, secy, plcy_req, sak, + salt, rxsc->ssci[assoc_num]); + if (ret) + goto fail; plcy_req->sa_index[0] = rxsc->hw_sa_id[assoc_num]; plcy_req->sa_cnt = 1; @@ -400,12 +501,16 @@ static int cn10k_mcs_write_tx_secy(struct otx2_nic *pfvf, struct mcs_secy_plcy_write_req *req; struct mbox *mbox = &pfvf->mbox; struct macsec_tx_sc *sw_tx_sc; - /* Insert SecTag after 12 bytes (DA+SA)*/ - u8 tag_offset = 12; u8 sectag_tci = 0; + u8 tag_offset; u64 policy; + u8 cipher; int ret; + /* Insert SecTag after 12 bytes (DA+SA) or 16 bytes + * if VLAN tag needs to be sent in clear text. + */ + tag_offset = txsc->vlan_dev ? 16 : 12; sw_tx_sc = &secy->tx_sc; mutex_lock(&mbox->lock); @@ -434,7 +539,21 @@ static int cn10k_mcs_write_tx_secy(struct otx2_nic *pfvf, policy |= FIELD_PREP(MCS_TX_SECY_PLCY_ST_OFFSET, tag_offset); policy |= MCS_TX_SECY_PLCY_INS_MODE; policy |= MCS_TX_SECY_PLCY_AUTH_ENA; - policy |= FIELD_PREP(MCS_TX_SECY_PLCY_CIP, MCS_GCM_AES_128); + + switch (secy->key_len) { + case 16: + cipher = secy->xpn ? MCS_GCM_AES_XPN_128 : MCS_GCM_AES_128; + break; + case 32: + cipher = secy->xpn ? MCS_GCM_AES_XPN_256 : MCS_GCM_AES_256; + break; + default: + cipher = MCS_GCM_AES_128; + dev_warn(pfvf->dev, "Unsupported key length\n"); + break; + } + + policy |= FIELD_PREP(MCS_TX_SECY_PLCY_CIP, cipher); if (secy->protect_frames) policy |= MCS_TX_SECY_PLCY_PROTECT; @@ -542,10 +661,10 @@ static int cn10k_mcs_write_tx_sa_plcy(struct otx2_nic *pfvf, struct cn10k_mcs_txsc *txsc, u8 assoc_num) { - unsigned char *src = txsc->sa_key[assoc_num]; struct mcs_sa_plcy_write_req *plcy_req; + u8 *sak = txsc->sa_key[assoc_num]; + u8 *salt = txsc->salt[assoc_num]; struct mbox *mbox = &pfvf->mbox; - u8 reg, key_len; int ret; mutex_lock(&mbox->lock); @@ -556,10 +675,10 @@ static int cn10k_mcs_write_tx_sa_plcy(struct otx2_nic *pfvf, goto fail; } - for (reg = 0, key_len = 0; key_len < secy->key_len; key_len += 8) { - memcpy((u8 *)&plcy_req->plcy[0][reg], (src + reg * 8), 8); - reg++; - } + ret = cn10k_mcs_write_keys(pfvf, secy, plcy_req, sak, + salt, txsc->ssci[assoc_num]); + if (ret) + goto fail; plcy_req->plcy[0][8] = assoc_num; plcy_req->sa_index[0] = txsc->hw_sa_id[assoc_num]; @@ -922,8 +1041,7 @@ static int cn10k_mcs_secy_tx_cfg(struct otx2_nic *pfvf, struct macsec_secy *secy { if (sw_tx_sa) { cn10k_mcs_write_tx_sa_plcy(pfvf, secy, txsc, sa_num); - cn10k_write_tx_sa_pn(pfvf, txsc, sa_num, - sw_tx_sa->next_pn_halves.lower); + cn10k_write_tx_sa_pn(pfvf, txsc, sa_num, sw_tx_sa->next_pn); cn10k_mcs_link_tx_sa2sc(pfvf, secy, txsc, sa_num, sw_tx_sa->active); } @@ -959,7 +1077,7 @@ static int cn10k_mcs_secy_rx_cfg(struct otx2_nic *pfvf, cn10k_mcs_write_rx_sa_plcy(pfvf, secy, mcs_rx_sc, sa_num, sw_rx_sa->active); cn10k_mcs_write_rx_sa_pn(pfvf, mcs_rx_sc, sa_num, - sw_rx_sa->next_pn_halves.lower); + sw_rx_sa->next_pn); } cn10k_mcs_write_rx_flowid(pfvf, mcs_rx_sc, hw_secy_id); @@ -1053,7 +1171,7 @@ static void cn10k_mcs_sync_stats(struct otx2_nic *pfvf, struct macsec_secy *secy static int cn10k_mdo_open(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_secy *secy = ctx->secy; struct macsec_tx_sa *sw_tx_sa; @@ -1077,7 +1195,7 @@ static int cn10k_mdo_open(struct macsec_context *ctx) static int cn10k_mdo_stop(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct cn10k_mcs_txsc *txsc; int err; @@ -1095,7 +1213,7 @@ static int cn10k_mdo_stop(struct macsec_context *ctx) static int cn10k_mdo_add_secy(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_secy *secy = ctx->secy; struct cn10k_mcs_txsc *txsc; @@ -1103,13 +1221,6 @@ static int cn10k_mdo_add_secy(struct macsec_context *ctx) if (secy->icv_len != MACSEC_DEFAULT_ICV_LEN) return -EOPNOTSUPP; - /* Stick to 16 bytes key len until XPN support is added */ - if (secy->key_len != 16) - return -EOPNOTSUPP; - - if (secy->xpn) - return -EOPNOTSUPP; - txsc = cn10k_mcs_create_txsc(pfvf); if (IS_ERR(txsc)) return -ENOSPC; @@ -1118,6 +1229,7 @@ static int cn10k_mdo_add_secy(struct macsec_context *ctx) txsc->encoding_sa = secy->tx_sc.encoding_sa; txsc->last_validate_frames = secy->validate_frames; txsc->last_replay_protect = secy->replay_protect; + txsc->vlan_dev = is_vlan_dev(ctx->netdev); list_add(&txsc->entry, &cfg->txsc_list); @@ -1129,7 +1241,7 @@ static int cn10k_mdo_add_secy(struct macsec_context *ctx) static int cn10k_mdo_upd_secy(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_secy *secy = ctx->secy; struct macsec_tx_sa *sw_tx_sa; @@ -1164,7 +1276,7 @@ static int cn10k_mdo_upd_secy(struct macsec_context *ctx) static int cn10k_mdo_del_secy(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct cn10k_mcs_txsc *txsc; @@ -1183,7 +1295,7 @@ static int cn10k_mdo_del_secy(struct macsec_context *ctx) static int cn10k_mdo_add_txsa(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct macsec_tx_sa *sw_tx_sa = ctx->sa.tx_sa; struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_secy *secy = ctx->secy; @@ -1202,6 +1314,9 @@ static int cn10k_mdo_add_txsa(struct macsec_context *ctx) return -ENOSPC; memcpy(&txsc->sa_key[sa_num], ctx->sa.key, secy->key_len); + memcpy(&txsc->salt[sa_num], sw_tx_sa->key.salt.bytes, MACSEC_SALT_LEN); + txsc->ssci[sa_num] = sw_tx_sa->ssci; + txsc->sa_bmap |= 1 << sa_num; if (netif_running(secy->netdev)) { @@ -1210,7 +1325,7 @@ static int cn10k_mdo_add_txsa(struct macsec_context *ctx) return err; err = cn10k_write_tx_sa_pn(pfvf, txsc, sa_num, - sw_tx_sa->next_pn_halves.lower); + sw_tx_sa->next_pn); if (err) return err; @@ -1225,7 +1340,7 @@ static int cn10k_mdo_add_txsa(struct macsec_context *ctx) static int cn10k_mdo_upd_txsa(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct macsec_tx_sa *sw_tx_sa = ctx->sa.tx_sa; struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_secy *secy = ctx->secy; @@ -1243,7 +1358,7 @@ static int cn10k_mdo_upd_txsa(struct macsec_context *ctx) if (netif_running(secy->netdev)) { /* Keys cannot be changed after creation */ err = cn10k_write_tx_sa_pn(pfvf, txsc, sa_num, - sw_tx_sa->next_pn_halves.lower); + sw_tx_sa->next_pn); if (err) return err; @@ -1258,7 +1373,7 @@ static int cn10k_mdo_upd_txsa(struct macsec_context *ctx) static int cn10k_mdo_del_txsa(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; u8 sa_num = ctx->sa.assoc_num; struct cn10k_mcs_txsc *txsc; @@ -1278,7 +1393,7 @@ static int cn10k_mdo_del_txsa(struct macsec_context *ctx) static int cn10k_mdo_add_rxsc(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_secy *secy = ctx->secy; struct cn10k_mcs_rxsc *rxsc; @@ -1312,7 +1427,7 @@ static int cn10k_mdo_add_rxsc(struct macsec_context *ctx) static int cn10k_mdo_upd_rxsc(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_secy *secy = ctx->secy; bool enable = ctx->rx_sc->active; @@ -1331,7 +1446,7 @@ static int cn10k_mdo_upd_rxsc(struct macsec_context *ctx) static int cn10k_mdo_del_rxsc(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct cn10k_mcs_rxsc *rxsc; @@ -1349,11 +1464,10 @@ static int cn10k_mdo_del_rxsc(struct macsec_context *ctx) static int cn10k_mdo_add_rxsa(struct macsec_context *ctx) { + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct macsec_rx_sc *sw_rx_sc = ctx->sa.rx_sa->sc; - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_rx_sa *rx_sa = ctx->sa.rx_sa; - u64 next_pn = rx_sa->next_pn_halves.lower; struct macsec_secy *secy = ctx->secy; bool sa_in_use = rx_sa->active; u8 sa_num = ctx->sa.assoc_num; @@ -1371,6 +1485,9 @@ static int cn10k_mdo_add_rxsa(struct macsec_context *ctx) return -ENOSPC; memcpy(&rxsc->sa_key[sa_num], ctx->sa.key, ctx->secy->key_len); + memcpy(&rxsc->salt[sa_num], rx_sa->key.salt.bytes, MACSEC_SALT_LEN); + rxsc->ssci[sa_num] = rx_sa->ssci; + rxsc->sa_bmap |= 1 << sa_num; if (netif_running(secy->netdev)) { @@ -1379,7 +1496,8 @@ static int cn10k_mdo_add_rxsa(struct macsec_context *ctx) if (err) return err; - err = cn10k_mcs_write_rx_sa_pn(pfvf, rxsc, sa_num, next_pn); + err = cn10k_mcs_write_rx_sa_pn(pfvf, rxsc, sa_num, + rx_sa->next_pn); if (err) return err; } @@ -1389,11 +1507,10 @@ static int cn10k_mdo_add_rxsa(struct macsec_context *ctx) static int cn10k_mdo_upd_rxsa(struct macsec_context *ctx) { + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct macsec_rx_sc *sw_rx_sc = ctx->sa.rx_sa->sc; - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_rx_sa *rx_sa = ctx->sa.rx_sa; - u64 next_pn = rx_sa->next_pn_halves.lower; struct macsec_secy *secy = ctx->secy; bool sa_in_use = rx_sa->active; u8 sa_num = ctx->sa.assoc_num; @@ -1412,7 +1529,8 @@ static int cn10k_mdo_upd_rxsa(struct macsec_context *ctx) if (err) return err; - err = cn10k_mcs_write_rx_sa_pn(pfvf, rxsc, sa_num, next_pn); + err = cn10k_mcs_write_rx_sa_pn(pfvf, rxsc, sa_num, + rx_sa->next_pn); if (err) return err; } @@ -1422,8 +1540,8 @@ static int cn10k_mdo_upd_rxsa(struct macsec_context *ctx) static int cn10k_mdo_del_rxsa(struct macsec_context *ctx) { + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct macsec_rx_sc *sw_rx_sc = ctx->sa.rx_sa->sc; - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; u8 sa_num = ctx->sa.assoc_num; struct cn10k_mcs_rxsc *rxsc; @@ -1445,8 +1563,8 @@ static int cn10k_mdo_del_rxsa(struct macsec_context *ctx) static int cn10k_mdo_get_dev_stats(struct macsec_context *ctx) { + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct mcs_secy_stats tx_rsp = { 0 }, rx_rsp = { 0 }; - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_secy *secy = ctx->secy; struct cn10k_mcs_txsc *txsc; @@ -1481,7 +1599,7 @@ static int cn10k_mdo_get_dev_stats(struct macsec_context *ctx) static int cn10k_mdo_get_tx_sc_stats(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct mcs_sc_stats rsp = { 0 }; struct cn10k_mcs_txsc *txsc; @@ -1502,7 +1620,7 @@ static int cn10k_mdo_get_tx_sc_stats(struct macsec_context *ctx) static int cn10k_mdo_get_tx_sa_stats(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct mcs_sa_stats rsp = { 0 }; u8 sa_num = ctx->sa.assoc_num; @@ -1525,7 +1643,7 @@ static int cn10k_mdo_get_tx_sa_stats(struct macsec_context *ctx) static int cn10k_mdo_get_rx_sc_stats(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_secy *secy = ctx->secy; struct mcs_sc_stats rsp = { 0 }; @@ -1567,8 +1685,8 @@ static int cn10k_mdo_get_rx_sc_stats(struct macsec_context *ctx) static int cn10k_mdo_get_rx_sa_stats(struct macsec_context *ctx) { + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct macsec_rx_sc *sw_rx_sc = ctx->sa.rx_sa->sc; - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct mcs_sa_stats rsp = { 0 }; u8 sa_num = ctx->sa.assoc_num; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 8a41ad8ca04f..20ecc90d203e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -8,6 +8,7 @@ #include <linux/interrupt.h> #include <linux/pci.h> #include <net/tso.h> +#include <linux/bitfield.h> #include "otx2_reg.h" #include "otx2_common.h" @@ -89,6 +90,11 @@ int otx2_update_sq_stats(struct otx2_nic *pfvf, int qidx) if (!pfvf->qset.sq) return 0; + if (qidx >= pfvf->hw.non_qos_queues) { + if (!test_bit(qidx - pfvf->hw.non_qos_queues, pfvf->qos.qos_sq_bmap)) + return 0; + } + otx2_nix_sq_op_stats(&sq->stats, pfvf, qidx); return 1; } @@ -513,11 +519,32 @@ void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx) (pfvf->hw.cq_ecount_wait - 1)); } -int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, - dma_addr_t *dma) +static int otx2_alloc_pool_buf(struct otx2_nic *pfvf, struct otx2_pool *pool, + dma_addr_t *dma) +{ + unsigned int offset = 0; + struct page *page; + size_t sz; + + sz = SKB_DATA_ALIGN(pool->rbsize); + sz = ALIGN(sz, OTX2_ALIGN); + + page = page_pool_alloc_frag(pool->page_pool, &offset, sz, GFP_ATOMIC); + if (unlikely(!page)) + return -ENOMEM; + + *dma = page_pool_get_dma_addr(page) + offset; + return 0; +} + +static int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, + dma_addr_t *dma) { u8 *buf; + if (pool->page_pool) + return otx2_alloc_pool_buf(pfvf, pool, dma); + buf = napi_alloc_frag_align(pool->rbsize, OTX2_ALIGN); if (unlikely(!buf)) return -ENOMEM; @@ -532,8 +559,8 @@ int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, return 0; } -static int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, - dma_addr_t *dma) +int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, + dma_addr_t *dma) { int ret; @@ -546,20 +573,8 @@ static int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, int otx2_alloc_buffer(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, dma_addr_t *dma) { - if (unlikely(__otx2_alloc_rbuf(pfvf, cq->rbpool, dma))) { - struct refill_work *work; - struct delayed_work *dwork; - - work = &pfvf->refill_wrk[cq->cq_idx]; - dwork = &work->pool_refill_work; - /* Schedule a task if no other task is running */ - if (!cq->refill_task_sched) { - cq->refill_task_sched = true; - schedule_delayed_work(dwork, - msecs_to_jiffies(100)); - } + if (unlikely(__otx2_alloc_rbuf(pfvf, cq->rbpool, dma))) return -ENOMEM; - } return 0; } @@ -616,6 +631,10 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl, int prio, bool txschq_for req->regval[0] = ((u64)pfvf->tx_max_pktlen << 8) | OTX2_MIN_MTU; req->regval[0] |= (0x20ULL << 51) | (0x80ULL << 39) | (0x2ULL << 36); + /* Set link type for DWRR MTU selection on CN10K silicons */ + if (!is_dev_otx2(pfvf->pdev)) + req->regval[0] |= FIELD_PREP(GENMASK_ULL(58, 57), + (u64)hw->smq_link_type); req->num_regs++; /* MDQ config */ parent = schq_list[NIX_TXSCH_LVL_TL4][prio]; @@ -716,7 +735,8 @@ EXPORT_SYMBOL(otx2_smq_flush); int otx2_txsch_alloc(struct otx2_nic *pfvf) { struct nix_txsch_alloc_req *req; - int lvl; + struct nix_txsch_alloc_rsp *rsp; + int lvl, schq, rc; /* Get memory to put this msg */ req = otx2_mbox_alloc_msg_nix_txsch_alloc(&pfvf->mbox); @@ -726,43 +746,84 @@ int otx2_txsch_alloc(struct otx2_nic *pfvf) /* Request one schq per level */ for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) req->schq[lvl] = 1; + rc = otx2_sync_mbox_msg(&pfvf->mbox); + if (rc) + return rc; - return otx2_sync_mbox_msg(&pfvf->mbox); + rsp = (struct nix_txsch_alloc_rsp *) + otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0, &req->hdr); + if (IS_ERR(rsp)) + return PTR_ERR(rsp); + + /* Setup transmit scheduler list */ + for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) + for (schq = 0; schq < rsp->schq[lvl]; schq++) + pfvf->hw.txschq_list[lvl][schq] = + rsp->schq_list[lvl][schq]; + + pfvf->hw.txschq_link_cfg_lvl = rsp->link_cfg_lvl; + + return 0; } -int otx2_txschq_stop(struct otx2_nic *pfvf) +void otx2_txschq_free_one(struct otx2_nic *pfvf, u16 lvl, u16 schq) { struct nix_txsch_free_req *free_req; - int lvl, schq, err; + int err; mutex_lock(&pfvf->mbox.lock); - /* Free the transmit schedulers */ + free_req = otx2_mbox_alloc_msg_nix_txsch_free(&pfvf->mbox); if (!free_req) { mutex_unlock(&pfvf->mbox.lock); - return -ENOMEM; + netdev_err(pfvf->netdev, + "Failed alloc txschq free req\n"); + return; } - free_req->flags = TXSCHQ_FREE_ALL; + free_req->schq_lvl = lvl; + free_req->schq = schq; + err = otx2_sync_mbox_msg(&pfvf->mbox); + if (err) { + netdev_err(pfvf->netdev, + "Failed stop txschq %d at level %d\n", schq, lvl); + } + mutex_unlock(&pfvf->mbox.lock); +} +EXPORT_SYMBOL(otx2_txschq_free_one); + +void otx2_txschq_stop(struct otx2_nic *pfvf) +{ + int lvl, schq; + + /* free non QOS TLx nodes */ + for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) + otx2_txschq_free_one(pfvf, lvl, + pfvf->hw.txschq_list[lvl][0]); /* Clear the txschq list */ for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) { for (schq = 0; schq < MAX_TXSCHQ_PER_FUNC; schq++) pfvf->hw.txschq_list[lvl][schq] = 0; } - return err; + } void otx2_sqb_flush(struct otx2_nic *pfvf) { int qidx, sqe_tail, sqe_head; + struct otx2_snd_queue *sq; u64 incr, *ptr, val; int timeout = 1000; ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS); - for (qidx = 0; qidx < pfvf->hw.tot_tx_queues; qidx++) { + for (qidx = 0; qidx < otx2_get_total_tx_queues(pfvf); qidx++) { + sq = &pfvf->qset.sq[qidx]; + if (!sq->sqb_ptrs) + continue; + incr = (u64)qidx << 32; while (timeout) { val = otx2_atomic64_add(incr, ptr); @@ -862,7 +923,7 @@ int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura) return otx2_sync_mbox_msg(&pfvf->mbox); } -static int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura) +int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura) { struct otx2_qset *qset = &pfvf->qset; struct otx2_snd_queue *sq; @@ -935,9 +996,17 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx) cq->cint_idx = qidx - pfvf->hw.rx_queues; cq->cqe_cnt = qset->sqe_cnt; } else { - cq->cq_type = CQ_XDP; - cq->cint_idx = qidx - non_xdp_queues; - cq->cqe_cnt = qset->sqe_cnt; + if (pfvf->hw.xdp_queues && + qidx < non_xdp_queues + pfvf->hw.xdp_queues) { + cq->cq_type = CQ_XDP; + cq->cint_idx = qidx - non_xdp_queues; + cq->cqe_cnt = qset->sqe_cnt; + } else { + cq->cq_type = CQ_QOS; + cq->cint_idx = qidx - non_xdp_queues - + pfvf->hw.xdp_queues; + cq->cqe_cnt = qset->sqe_cnt; + } } cq->cqe_size = pfvf->qset.xqe_size; @@ -999,39 +1068,20 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx) static void otx2_pool_refill_task(struct work_struct *work) { struct otx2_cq_queue *cq; - struct otx2_pool *rbpool; struct refill_work *wrk; - int qidx, free_ptrs = 0; struct otx2_nic *pfvf; - dma_addr_t bufptr; + int qidx; wrk = container_of(work, struct refill_work, pool_refill_work.work); pfvf = wrk->pf; qidx = wrk - pfvf->refill_wrk; cq = &pfvf->qset.cq[qidx]; - rbpool = cq->rbpool; - free_ptrs = cq->pool_ptrs; - while (cq->pool_ptrs) { - if (otx2_alloc_rbuf(pfvf, rbpool, &bufptr)) { - /* Schedule a WQ if we fails to free atleast half of the - * pointers else enable napi for this RQ. - */ - if (!((free_ptrs - cq->pool_ptrs) > free_ptrs / 2)) { - struct delayed_work *dwork; - - dwork = &wrk->pool_refill_work; - schedule_delayed_work(dwork, - msecs_to_jiffies(100)); - } else { - cq->refill_task_sched = false; - } - return; - } - pfvf->hw_ops->aura_freeptr(pfvf, qidx, bufptr + OTX2_HEAD_ROOM); - cq->pool_ptrs--; - } cq->refill_task_sched = false; + + local_bh_disable(); + napi_schedule(wrk->napi); + local_bh_enable(); } int otx2_config_nix_queues(struct otx2_nic *pfvf) @@ -1048,7 +1098,7 @@ int otx2_config_nix_queues(struct otx2_nic *pfvf) } /* Initialize TX queues */ - for (qidx = 0; qidx < pfvf->hw.tot_tx_queues; qidx++) { + for (qidx = 0; qidx < pfvf->hw.non_qos_queues; qidx++) { u16 sqb_aura = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx); err = otx2_sq_init(pfvf, qidx, sqb_aura); @@ -1095,7 +1145,7 @@ int otx2_config_nix(struct otx2_nic *pfvf) /* Set RQ/SQ/CQ counts */ nixlf->rq_cnt = pfvf->hw.rx_queues; - nixlf->sq_cnt = pfvf->hw.tot_tx_queues; + nixlf->sq_cnt = otx2_get_total_tx_queues(pfvf); nixlf->cq_cnt = pfvf->qset.cq_cnt; nixlf->rss_sz = MAX_RSS_INDIR_TBL_SIZE; nixlf->rss_grps = MAX_RSS_GROUPS; @@ -1133,7 +1183,7 @@ void otx2_sq_free_sqbs(struct otx2_nic *pfvf) int sqb, qidx; u64 iova, pa; - for (qidx = 0; qidx < hw->tot_tx_queues; qidx++) { + for (qidx = 0; qidx < otx2_get_total_tx_queues(pfvf); qidx++) { sq = &qset->sq[qidx]; if (!sq->sqb_ptrs) continue; @@ -1151,10 +1201,31 @@ void otx2_sq_free_sqbs(struct otx2_nic *pfvf) } } +void otx2_free_bufs(struct otx2_nic *pfvf, struct otx2_pool *pool, + u64 iova, int size) +{ + struct page *page; + u64 pa; + + pa = otx2_iova_to_phys(pfvf->iommu_domain, iova); + page = virt_to_head_page(phys_to_virt(pa)); + + if (pool->page_pool) { + page_pool_put_full_page(pool->page_pool, page, true); + } else { + dma_unmap_page_attrs(pfvf->dev, iova, size, + DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + + put_page(page); + } +} + void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type) { int pool_id, pool_start = 0, pool_end = 0, size = 0; - u64 iova, pa; + struct otx2_pool *pool; + u64 iova; if (type == AURA_NIX_SQ) { pool_start = otx2_get_pool_idx(pfvf, type, 0); @@ -1170,15 +1241,13 @@ void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type) /* Free SQB and RQB pointers from the aura pool */ for (pool_id = pool_start; pool_id < pool_end; pool_id++) { iova = otx2_aura_allocptr(pfvf, pool_id); + pool = &pfvf->qset.pool[pool_id]; while (iova) { if (type == AURA_NIX_RQ) iova -= OTX2_HEAD_ROOM; - pa = otx2_iova_to_phys(pfvf->iommu_domain, iova); - dma_unmap_page_attrs(pfvf->dev, iova, size, - DMA_FROM_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC); - put_page(virt_to_page(phys_to_virt(pa))); + otx2_free_bufs(pfvf, pool, iova, size); + iova = otx2_aura_allocptr(pfvf, pool_id); } } @@ -1196,13 +1265,15 @@ void otx2_aura_pool_free(struct otx2_nic *pfvf) pool = &pfvf->qset.pool[pool_id]; qmem_free(pfvf->dev, pool->stack); qmem_free(pfvf->dev, pool->fc_addr); + page_pool_destroy(pool->page_pool); + pool->page_pool = NULL; } devm_kfree(pfvf->dev, pfvf->qset.pool); pfvf->qset.pool = NULL; } -static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id, - int pool_id, int numptrs) +int otx2_aura_init(struct otx2_nic *pfvf, int aura_id, + int pool_id, int numptrs) { struct npa_aq_enq_req *aq; struct otx2_pool *pool; @@ -1278,9 +1349,10 @@ static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id, return 0; } -static int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, - int stack_pages, int numptrs, int buf_size) +int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, + int stack_pages, int numptrs, int buf_size, int type) { + struct page_pool_params pp_params = { 0 }; struct npa_aq_enq_req *aq; struct otx2_pool *pool; int err; @@ -1324,6 +1396,22 @@ static int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, aq->ctype = NPA_AQ_CTYPE_POOL; aq->op = NPA_AQ_INSTOP_INIT; + if (type != AURA_NIX_RQ) { + pool->page_pool = NULL; + return 0; + } + + pp_params.flags = PP_FLAG_PAGE_FRAG | PP_FLAG_DMA_MAP; + pp_params.pool_size = min(OTX2_PAGE_POOL_SZ, numptrs); + pp_params.nid = NUMA_NO_NODE; + pp_params.dev = pfvf->dev; + pp_params.dma_dir = DMA_FROM_DEVICE; + pool->page_pool = page_pool_create(&pp_params); + if (IS_ERR(pool->page_pool)) { + netdev_err(pfvf->netdev, "Creation of page pool failed\n"); + return PTR_ERR(pool->page_pool); + } + return 0; } @@ -1349,7 +1437,7 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) stack_pages = (num_sqbs + hw->stack_pg_ptrs - 1) / hw->stack_pg_ptrs; - for (qidx = 0; qidx < hw->tot_tx_queues; qidx++) { + for (qidx = 0; qidx < hw->non_qos_queues; qidx++) { pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx); /* Initialize aura context */ err = otx2_aura_init(pfvf, pool_id, pool_id, num_sqbs); @@ -1358,7 +1446,7 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) /* Initialize pool context */ err = otx2_pool_init(pfvf, pool_id, stack_pages, - num_sqbs, hw->sqb_size); + num_sqbs, hw->sqb_size, AURA_NIX_SQ); if (err) goto fail; } @@ -1369,7 +1457,7 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) goto fail; /* Allocate pointers and free them to aura/pool */ - for (qidx = 0; qidx < hw->tot_tx_queues; qidx++) { + for (qidx = 0; qidx < hw->non_qos_queues; qidx++) { pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx); pool = &pfvf->qset.pool[pool_id]; @@ -1421,7 +1509,7 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf) } for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) { err = otx2_pool_init(pfvf, pool_id, stack_pages, - num_ptrs, pfvf->rbsize); + num_ptrs, pfvf->rbsize, AURA_NIX_RQ); if (err) goto fail; } @@ -1605,7 +1693,6 @@ int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable) req->bpid_per_chan = 0; #endif - return otx2_sync_mbox_msg(&pfvf->mbox); } EXPORT_SYMBOL(otx2_nix_config_bp); @@ -1629,21 +1716,6 @@ void mbox_handler_cgx_fec_stats(struct otx2_nic *pfvf, pfvf->hw.cgx_fec_uncorr_blks += rsp->fec_uncorr_blks; } -void mbox_handler_nix_txsch_alloc(struct otx2_nic *pf, - struct nix_txsch_alloc_rsp *rsp) -{ - int lvl, schq; - - /* Setup transmit scheduler list */ - for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) - for (schq = 0; schq < rsp->schq[lvl]; schq++) - pf->hw.txschq_list[lvl][schq] = - rsp->schq_list[lvl][schq]; - - pf->hw.txschq_link_cfg_lvl = rsp->link_cfg_lvl; -} -EXPORT_SYMBOL(mbox_handler_nix_txsch_alloc); - void mbox_handler_npa_lf_alloc(struct otx2_nic *pfvf, struct npa_lf_alloc_rsp *rsp) { @@ -1727,6 +1799,17 @@ void otx2_set_cints_affinity(struct otx2_nic *pfvf) } } +static u32 get_dwrr_mtu(struct otx2_nic *pfvf, struct nix_hw_info *hw) +{ + if (is_otx2_lbkvf(pfvf->pdev)) { + pfvf->hw.smq_link_type = SMQ_LINK_TYPE_LBK; + return hw->lbk_dwrr_mtu; + } + + pfvf->hw.smq_link_type = SMQ_LINK_TYPE_RPM; + return hw->rpm_dwrr_mtu; +} + u16 otx2_get_max_mtu(struct otx2_nic *pfvf) { struct nix_hw_info *rsp; @@ -1756,7 +1839,7 @@ u16 otx2_get_max_mtu(struct otx2_nic *pfvf) max_mtu = rsp->max_mtu - 8 - OTX2_ETH_HLEN; /* Also save DWRR MTU, needed for DWRR weight calculation */ - pfvf->hw.dwrr_mtu = rsp->rpm_dwrr_mtu; + pfvf->hw.dwrr_mtu = get_dwrr_mtu(pfvf, rsp); if (!pfvf->hw.dwrr_mtu) pfvf->hw.dwrr_mtu = 1; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 0c8fc66ade82..0e81849db353 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -15,6 +15,7 @@ #include <linux/ptp_clock_kernel.h> #include <linux/timecounter.h> #include <linux/soc/marvell/octeontx2/asm.h> +#include <net/macsec.h> #include <net/pkt_cls.h> #include <net/devlink.h> #include <linux/time64.h> @@ -27,6 +28,7 @@ #include "otx2_txrx.h" #include "otx2_devlink.h" #include <rvu_trace.h> +#include "qos.h" /* IPv4 flag more fragment bit */ #define IPV4_FLAG_MORE 0x20 @@ -183,13 +185,29 @@ struct mbox { int up_num_msgs; /* mbox_up number of messages */ }; +/* Egress rate limiting definitions */ +#define MAX_BURST_EXPONENT 0x0FULL +#define MAX_BURST_MANTISSA 0xFFULL +#define MAX_BURST_SIZE 130816ULL +#define MAX_RATE_DIVIDER_EXPONENT 12ULL +#define MAX_RATE_EXPONENT 0x0FULL +#define MAX_RATE_MANTISSA 0xFFULL + +/* Bitfields in NIX_TLX_PIR register */ +#define TLX_RATE_MANTISSA GENMASK_ULL(8, 1) +#define TLX_RATE_EXPONENT GENMASK_ULL(12, 9) +#define TLX_RATE_DIVIDER_EXPONENT GENMASK_ULL(16, 13) +#define TLX_BURST_MANTISSA GENMASK_ULL(36, 29) +#define TLX_BURST_EXPONENT GENMASK_ULL(40, 37) + struct otx2_hw { struct pci_dev *pdev; struct otx2_rss_info rss_info; u16 rx_queues; u16 tx_queues; u16 xdp_queues; - u16 tot_tx_queues; + u16 tc_tx_queues; + u16 non_qos_queues; /* tx queues plus xdp queues */ u16 max_queues; u16 pool_cnt; u16 rqpool_cnt; @@ -209,6 +227,7 @@ struct otx2_hw { u16 txschq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC]; u16 matchall_ipolicer; u32 dwrr_mtu; + u8 smq_link_type; /* HW settings, coalescing etc */ u16 rx_chan_base; @@ -250,6 +269,7 @@ struct otx2_hw { #define CN10K_RPM 3 #define CN10K_PTP_ONESTEP 4 #define CN10K_HW_MACSEC 5 +#define QOS_CIR_PIR_SUPPORT 6 unsigned long cap_flag; #define LMT_LINE_SIZE 128 @@ -281,6 +301,7 @@ struct flr_work { struct refill_work { struct delayed_work pool_refill_work; struct otx2_nic *pf; + struct napi_struct *napi; }; /* PTPv2 originTimestamp structure */ @@ -353,7 +374,7 @@ struct dev_hw_ops { int (*sq_aq_init)(void *dev, u16 qidx, u16 sqb_aura); void (*sqe_flush)(void *dev, struct otx2_snd_queue *sq, int size, int qidx); - void (*refill_pool_ptrs)(void *dev, struct otx2_cq_queue *cq); + int (*refill_pool_ptrs)(void *dev, struct otx2_cq_queue *cq); void (*aura_freeptr)(void *dev, int aura, u64 buf); }; @@ -398,6 +419,9 @@ struct cn10k_mcs_txsc { u8 sa_bmap; u8 sa_key[CN10K_MCS_SA_PER_SC][MACSEC_MAX_KEY_LEN]; u8 encoding_sa; + u8 salt[CN10K_MCS_SA_PER_SC][MACSEC_SALT_LEN]; + ssci_t ssci[CN10K_MCS_SA_PER_SC]; + bool vlan_dev; /* macsec running on VLAN ? */ }; struct cn10k_mcs_rxsc { @@ -410,6 +434,8 @@ struct cn10k_mcs_rxsc { u16 hw_sa_id[CN10K_MCS_SA_PER_SC]; u8 sa_bmap; u8 sa_key[CN10K_MCS_SA_PER_SC][MACSEC_MAX_KEY_LEN]; + u8 salt[CN10K_MCS_SA_PER_SC][MACSEC_SALT_LEN]; + ssci_t ssci[CN10K_MCS_SA_PER_SC]; }; struct cn10k_mcs_cfg { @@ -501,6 +527,8 @@ struct otx2_nic { u16 pfc_schq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC]; bool pfc_alloc_status[NIX_PF_PFC_PRIO_MAX]; #endif + /* qos */ + struct otx2_qos qos; /* napi event count. It is needed for adaptive irq coalescing. */ u32 napi_events; @@ -582,6 +610,7 @@ static inline void otx2_setup_dev_hw_settings(struct otx2_nic *pfvf) __set_bit(CN10K_LMTST, &hw->cap_flag); __set_bit(CN10K_RPM, &hw->cap_flag); __set_bit(CN10K_PTP_ONESTEP, &hw->cap_flag); + __set_bit(QOS_CIR_PIR_SUPPORT, &hw->cap_flag); } if (is_dev_cn10kb(pfvf->pdev)) @@ -745,8 +774,7 @@ static inline void cn10k_aura_freeptr(void *dev, int aura, u64 buf) /* Alloc pointer from pool/aura */ static inline u64 otx2_aura_allocptr(struct otx2_nic *pfvf, int aura) { - u64 *ptr = (u64 *)otx2_get_regaddr(pfvf, - NPA_LF_AURA_OP_ALLOCX(0)); + u64 *ptr = (__force u64 *)otx2_get_regaddr(pfvf, NPA_LF_AURA_OP_ALLOCX(0)); u64 incr = (u64)aura | BIT_ULL(63); return otx2_atomic64_add(incr, ptr); @@ -888,12 +916,34 @@ static inline void otx2_dma_unmap_page(struct otx2_nic *pfvf, static inline u16 otx2_get_smq_idx(struct otx2_nic *pfvf, u16 qidx) { + u16 smq; #ifdef CONFIG_DCB if (qidx < NIX_PF_PFC_PRIO_MAX && pfvf->pfc_alloc_status[qidx]) return pfvf->pfc_schq_list[NIX_TXSCH_LVL_SMQ][qidx]; #endif + /* check if qidx falls under QOS queues */ + if (qidx >= pfvf->hw.non_qos_queues) + smq = pfvf->qos.qid_to_sqmap[qidx - pfvf->hw.non_qos_queues]; + else + smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0]; + + return smq; +} + +static inline u16 otx2_get_total_tx_queues(struct otx2_nic *pfvf) +{ + return pfvf->hw.non_qos_queues + pfvf->hw.tc_tx_queues; +} + +static inline u64 otx2_convert_rate(u64 rate) +{ + u64 converted_rate; + + /* Convert bytes per second to Mbps */ + converted_rate = rate * 8; + converted_rate = max_t(u64, converted_rate / 1000000, 1); - return pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0]; + return converted_rate; } /* MSI-X APIs */ @@ -920,19 +970,25 @@ int otx2_config_nix(struct otx2_nic *pfvf); int otx2_config_nix_queues(struct otx2_nic *pfvf); int otx2_txschq_config(struct otx2_nic *pfvf, int lvl, int prio, bool pfc_en); int otx2_txsch_alloc(struct otx2_nic *pfvf); -int otx2_txschq_stop(struct otx2_nic *pfvf); +void otx2_txschq_stop(struct otx2_nic *pfvf); +void otx2_txschq_free_one(struct otx2_nic *pfvf, u16 lvl, u16 schq); void otx2_sqb_flush(struct otx2_nic *pfvf); -int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, - dma_addr_t *dma); +int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, + dma_addr_t *dma); int otx2_rxtx_enable(struct otx2_nic *pfvf, bool enable); void otx2_ctx_disable(struct mbox *mbox, int type, bool npa); int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable); -void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq); +void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, int qidx); void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq); +int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura); int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura); int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura); int otx2_alloc_buffer(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, dma_addr_t *dma); +int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, + int stack_pages, int numptrs, int buf_size, int type); +int otx2_aura_init(struct otx2_nic *pfvf, int aura_id, + int pool_id, int numptrs); /* RSS configuration APIs*/ int otx2_rss_init(struct otx2_nic *pfvf); @@ -1000,6 +1056,8 @@ u16 otx2_get_max_mtu(struct otx2_nic *pfvf); int otx2_handle_ntuple_tc_features(struct net_device *netdev, netdev_features_t features); int otx2_smq_flush(struct otx2_nic *pfvf, int smq); +void otx2_free_bufs(struct otx2_nic *pfvf, struct otx2_pool *pool, + u64 iova, int size); /* tc support */ int otx2_init_tc(struct otx2_nic *nic); @@ -1040,4 +1098,24 @@ static inline void cn10k_handle_mcs_event(struct otx2_nic *pfvf, {} #endif /* CONFIG_MACSEC */ +/* qos support */ +static inline void otx2_qos_init(struct otx2_nic *pfvf, int qos_txqs) +{ + struct otx2_hw *hw = &pfvf->hw; + + hw->tc_tx_queues = qos_txqs; + INIT_LIST_HEAD(&pfvf->qos.qos_tree); + mutex_init(&pfvf->qos.qos_lock); +} + +static inline void otx2_shutdown_qos(struct otx2_nic *pfvf) +{ + mutex_destroy(&pfvf->qos.qos_lock); +} + +u16 otx2_select_queue(struct net_device *netdev, struct sk_buff *skb, + struct net_device *sb_dev); +int otx2_get_txq_by_classid(struct otx2_nic *pfvf, u16 classid); +void otx2_qos_config_txschq(struct otx2_nic *pfvf); +void otx2_clean_qos_queues(struct otx2_nic *pfvf); #endif /* OTX2_COMMON_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c index ccaf97bb1ce0..bfddbff7bcdf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c @@ -70,7 +70,7 @@ static int otx2_pfc_txschq_alloc_one(struct otx2_nic *pfvf, u8 prio) * link config level. These rest of the scheduler can be * same as hw.txschq_list. */ - for (lvl = 0; lvl < pfvf->hw.txschq_link_cfg_lvl; lvl++) + for (lvl = 0; lvl <= pfvf->hw.txschq_link_cfg_lvl; lvl++) req->schq[lvl] = 1; rc = otx2_sync_mbox_msg(&pfvf->mbox); @@ -83,7 +83,7 @@ static int otx2_pfc_txschq_alloc_one(struct otx2_nic *pfvf, u8 prio) return PTR_ERR(rsp); /* Setup transmit scheduler list */ - for (lvl = 0; lvl < pfvf->hw.txschq_link_cfg_lvl; lvl++) { + for (lvl = 0; lvl <= pfvf->hw.txschq_link_cfg_lvl; lvl++) { if (!rsp->schq[lvl]) return -ENOSPC; @@ -125,19 +125,12 @@ int otx2_pfc_txschq_alloc(struct otx2_nic *pfvf) static int otx2_pfc_txschq_stop_one(struct otx2_nic *pfvf, u8 prio) { - struct nix_txsch_free_req *free_req; + int lvl; - mutex_lock(&pfvf->mbox.lock); /* free PFC TLx nodes */ - free_req = otx2_mbox_alloc_msg_nix_txsch_free(&pfvf->mbox); - if (!free_req) { - mutex_unlock(&pfvf->mbox.lock); - return -ENOMEM; - } - - free_req->flags = TXSCHQ_FREE_ALL; - otx2_sync_mbox_msg(&pfvf->mbox); - mutex_unlock(&pfvf->mbox.lock); + for (lvl = 0; lvl <= pfvf->hw.txschq_link_cfg_lvl; lvl++) + otx2_txschq_free_one(pfvf, lvl, + pfvf->pfc_schq_list[lvl][prio]); pfvf->pfc_alloc_status[prio] = false; return 0; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 0f8d1a69139f..c47d91da32dc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -92,10 +92,16 @@ static void otx2_get_qset_strings(struct otx2_nic *pfvf, u8 **data, int qset) *data += ETH_GSTRING_LEN; } } - for (qidx = 0; qidx < pfvf->hw.tx_queues; qidx++) { + + for (qidx = 0; qidx < otx2_get_total_tx_queues(pfvf); qidx++) { for (stats = 0; stats < otx2_n_queue_stats; stats++) { - sprintf(*data, "txq%d: %s", qidx + start_qidx, - otx2_queue_stats[stats].name); + if (qidx >= pfvf->hw.non_qos_queues) + sprintf(*data, "txq_qos%d: %s", + qidx + start_qidx - pfvf->hw.non_qos_queues, + otx2_queue_stats[stats].name); + else + sprintf(*data, "txq%d: %s", qidx + start_qidx, + otx2_queue_stats[stats].name); *data += ETH_GSTRING_LEN; } } @@ -159,7 +165,7 @@ static void otx2_get_qset_stats(struct otx2_nic *pfvf, [otx2_queue_stats[stat].index]; } - for (qidx = 0; qidx < pfvf->hw.tx_queues; qidx++) { + for (qidx = 0; qidx < otx2_get_total_tx_queues(pfvf); qidx++) { if (!otx2_update_sq_stats(pfvf, qidx)) { for (stat = 0; stat < otx2_n_queue_stats; stat++) *((*data)++) = 0; @@ -254,7 +260,7 @@ static int otx2_get_sset_count(struct net_device *netdev, int sset) return -EINVAL; qstats_count = otx2_n_queue_stats * - (pfvf->hw.rx_queues + pfvf->hw.tx_queues); + (pfvf->hw.rx_queues + otx2_get_total_tx_queues(pfvf)); if (!test_bit(CN10K_RPM, &pfvf->hw.cap_flag)) mac_stats = CGX_RX_STATS_COUNT + CGX_TX_STATS_COUNT; otx2_update_lmac_fec_stats(pfvf); @@ -282,7 +288,7 @@ static int otx2_set_channels(struct net_device *dev, { struct otx2_nic *pfvf = netdev_priv(dev); bool if_up = netif_running(dev); - int err = 0; + int err, qos_txqs; if (!channel->rx_count || !channel->tx_count) return -EINVAL; @@ -296,14 +302,19 @@ static int otx2_set_channels(struct net_device *dev, if (if_up) dev->netdev_ops->ndo_stop(dev); - err = otx2_set_real_num_queues(dev, channel->tx_count, + qos_txqs = bitmap_weight(pfvf->qos.qos_sq_bmap, + OTX2_QOS_MAX_LEAF_NODES); + + err = otx2_set_real_num_queues(dev, channel->tx_count + qos_txqs, channel->rx_count); if (err) return err; pfvf->hw.rx_queues = channel->rx_count; pfvf->hw.tx_queues = channel->tx_count; - pfvf->qset.cq_cnt = pfvf->hw.tx_queues + pfvf->hw.rx_queues; + if (pfvf->xdp_prog) + pfvf->hw.xdp_queues = channel->rx_count; + pfvf->hw.non_qos_queues = pfvf->hw.tx_queues + pfvf->hw.xdp_queues; if (if_up) err = dev->netdev_ops->ndo_open(dev); @@ -1405,7 +1416,7 @@ static int otx2vf_get_sset_count(struct net_device *netdev, int sset) return -EINVAL; qstats_count = otx2_n_queue_stats * - (vf->hw.rx_queues + vf->hw.tx_queues); + (vf->hw.rx_queues + otx2_get_total_tx_queues(vf)); return otx2_n_dev_stats + otx2_n_drv_stats + qstats_count + 1; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c index 10e11262d48a..2d7713a1a153 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c @@ -872,6 +872,14 @@ static int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp, return -EINVAL; vlan_etype = be16_to_cpu(fsp->h_ext.vlan_etype); + + /* Drop rule with vlan_etype == 802.1Q + * and vlan_id == 0 is not supported + */ + if (vlan_etype == ETH_P_8021Q && !fsp->m_ext.vlan_tci && + fsp->ring_cookie == RX_CLS_FLOW_DISC) + return -EINVAL; + /* Only ETH_P_8021Q and ETH_P_802AD types supported */ if (vlan_etype != ETH_P_8021Q && vlan_etype != ETH_P_8021AD) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 18284ad75157..9ded98bb1c89 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -23,6 +23,7 @@ #include "otx2_struct.h" #include "otx2_ptp.h" #include "cn10k.h" +#include "qos.h" #include <rvu_trace.h> #define DRV_NAME "rvu_nicpf" @@ -271,8 +272,7 @@ static int otx2_pf_flr_init(struct otx2_nic *pf, int num_vfs) { int vf; - pf->flr_wq = alloc_workqueue("otx2_pf_flr_wq", - WQ_UNBOUND | WQ_HIGHPRI, 1); + pf->flr_wq = alloc_ordered_workqueue("otx2_pf_flr_wq", WQ_HIGHPRI); if (!pf->flr_wq) return -ENOMEM; @@ -593,9 +593,8 @@ static int otx2_pfvf_mbox_init(struct otx2_nic *pf, int numvfs) if (!pf->mbox_pfvf) return -ENOMEM; - pf->mbox_pfvf_wq = alloc_workqueue("otx2_pfvf_mailbox", - WQ_UNBOUND | WQ_HIGHPRI | - WQ_MEM_RECLAIM, 1); + pf->mbox_pfvf_wq = alloc_ordered_workqueue("otx2_pfvf_mailbox", + WQ_HIGHPRI | WQ_MEM_RECLAIM); if (!pf->mbox_pfvf_wq) return -ENOMEM; @@ -791,10 +790,6 @@ static void otx2_process_pfaf_mbox_msg(struct otx2_nic *pf, case MBOX_MSG_NIX_LF_ALLOC: mbox_handler_nix_lf_alloc(pf, (struct nix_lf_alloc_rsp *)msg); break; - case MBOX_MSG_NIX_TXSCH_ALLOC: - mbox_handler_nix_txsch_alloc(pf, - (struct nix_txsch_alloc_rsp *)msg); - break; case MBOX_MSG_NIX_BP_ENABLE: mbox_handler_nix_bp_enable(pf, (struct nix_bp_cfg_rsp *)msg); break; @@ -1063,9 +1058,8 @@ static int otx2_pfaf_mbox_init(struct otx2_nic *pf) int err; mbox->pfvf = pf; - pf->mbox_wq = alloc_workqueue("otx2_pfaf_mailbox", - WQ_UNBOUND | WQ_HIGHPRI | - WQ_MEM_RECLAIM, 1); + pf->mbox_wq = alloc_ordered_workqueue("otx2_pfaf_mailbox", + WQ_HIGHPRI | WQ_MEM_RECLAIM); if (!pf->mbox_wq) return -ENOMEM; @@ -1228,6 +1222,7 @@ static char *nix_snd_status_e_str[NIX_SND_STATUS_MAX] = { static irqreturn_t otx2_q_intr_handler(int irq, void *data) { struct otx2_nic *pf = data; + struct otx2_snd_queue *sq; u64 val, *ptr; u64 qidx = 0; @@ -1257,10 +1252,14 @@ static irqreturn_t otx2_q_intr_handler(int irq, void *data) } /* SQ */ - for (qidx = 0; qidx < pf->hw.tot_tx_queues; qidx++) { + for (qidx = 0; qidx < otx2_get_total_tx_queues(pf); qidx++) { u64 sq_op_err_dbg, mnq_err_dbg, snd_err_dbg; u8 sq_op_err_code, mnq_err_code, snd_err_code; + sq = &pf->qset.sq[qidx]; + if (!sq->sqb_ptrs) + continue; + /* Below debug registers captures first errors corresponding to * those registers. We don't have to check against SQ qid as * these are fatal errors. @@ -1383,8 +1382,11 @@ static void otx2_free_sq_res(struct otx2_nic *pf) otx2_ctx_disable(&pf->mbox, NIX_AQ_CTYPE_SQ, false); /* Free SQB pointers */ otx2_sq_free_sqbs(pf); - for (qidx = 0; qidx < pf->hw.tot_tx_queues; qidx++) { + for (qidx = 0; qidx < otx2_get_total_tx_queues(pf); qidx++) { sq = &qset->sq[qidx]; + /* Skip freeing Qos queues if they are not initialized */ + if (!sq->sqe) + continue; qmem_free(pf->dev, sq->sqe); qmem_free(pf->dev, sq->tso_hdrs); kfree(sq->sg); @@ -1433,7 +1435,7 @@ static int otx2_init_hw_resources(struct otx2_nic *pf) * so, aura count = pool count. */ hw->rqpool_cnt = hw->rx_queues; - hw->sqpool_cnt = hw->tot_tx_queues; + hw->sqpool_cnt = otx2_get_total_tx_queues(pf); hw->pool_cnt = hw->rqpool_cnt + hw->sqpool_cnt; /* Maximum hardware supported transmit length */ @@ -1452,8 +1454,9 @@ static int otx2_init_hw_resources(struct otx2_nic *pf) if (err) goto err_free_npa_lf; - /* Enable backpressure */ - otx2_nix_config_bp(pf, true); + /* Enable backpressure for CGX mapped PF/VFs */ + if (!is_otx2_lbkvf(pf->pdev)) + otx2_nix_config_bp(pf, true); /* Init Auras and pools used by NIX RQ, for free buffer ptrs */ err = otx2_rq_aura_pool_init(pf); @@ -1516,8 +1519,7 @@ err_free_nix_queues: otx2_free_cq_res(pf); otx2_ctx_disable(mbox, NIX_AQ_CTYPE_RQ, false); err_free_txsch: - if (otx2_txschq_stop(pf)) - dev_err(pf->dev, "%s failed to stop TX schedulers\n", __func__); + otx2_txschq_stop(pf); err_free_sq_ptrs: otx2_sq_free_sqbs(pf); err_free_rq_ptrs: @@ -1551,22 +1553,24 @@ static void otx2_free_hw_resources(struct otx2_nic *pf) struct nix_lf_free_req *free_req; struct mbox *mbox = &pf->mbox; struct otx2_cq_queue *cq; + struct otx2_pool *pool; struct msg_req *req; - int qidx, err; + int pool_id; + int qidx; /* Ensure all SQE are processed */ otx2_sqb_flush(pf); /* Stop transmission */ - err = otx2_txschq_stop(pf); - if (err) - dev_err(pf->dev, "RVUPF: Failed to stop/free TX schedulers\n"); + otx2_txschq_stop(pf); #ifdef CONFIG_DCB if (pf->pfc_en) otx2_pfc_txschq_stop(pf); #endif + otx2_clean_qos_queues(pf); + mutex_lock(&mbox->lock); /* Disable backpressure */ if (!(pf->pcifunc & RVU_PFVF_FUNC_MASK)) @@ -1580,7 +1584,7 @@ static void otx2_free_hw_resources(struct otx2_nic *pf) for (qidx = 0; qidx < qset->cq_cnt; qidx++) { cq = &qset->cq[qidx]; if (cq->cq_type == CQ_RX) - otx2_cleanup_rx_cqes(pf, cq); + otx2_cleanup_rx_cqes(pf, cq, qidx); else otx2_cleanup_tx_cqes(pf, cq); } @@ -1590,6 +1594,13 @@ static void otx2_free_hw_resources(struct otx2_nic *pf) /* Free RQ buffer pointers*/ otx2_free_aura_ptr(pf, AURA_NIX_RQ); + for (qidx = 0; qidx < pf->hw.rx_queues; qidx++) { + pool_id = otx2_get_pool_idx(pf, AURA_NIX_RQ, qidx); + pool = &pf->qset.pool[pool_id]; + page_pool_destroy(pool->page_pool); + pool->page_pool = NULL; + } + otx2_free_cq_res(pf); /* Free all ingress bandwidth profiles allocated */ @@ -1688,11 +1699,14 @@ int otx2_open(struct net_device *netdev) netif_carrier_off(netdev); - pf->qset.cq_cnt = pf->hw.rx_queues + pf->hw.tot_tx_queues; /* RQ and SQs are mapped to different CQs, * so find out max CQ IRQs (i.e CINTs) needed. */ - pf->hw.cint_cnt = max(pf->hw.rx_queues, pf->hw.tx_queues); + pf->hw.cint_cnt = max3(pf->hw.rx_queues, pf->hw.tx_queues, + pf->hw.tc_tx_queues); + + pf->qset.cq_cnt = pf->hw.rx_queues + otx2_get_total_tx_queues(pf); + qset->napi = kcalloc(pf->hw.cint_cnt, sizeof(*cq_poll), GFP_KERNEL); if (!qset->napi) return -ENOMEM; @@ -1708,7 +1722,7 @@ int otx2_open(struct net_device *netdev) if (!qset->cq) goto err_free_mem; - qset->sq = kcalloc(pf->hw.tot_tx_queues, + qset->sq = kcalloc(otx2_get_total_tx_queues(pf), sizeof(struct otx2_snd_queue), GFP_KERNEL); if (!qset->sq) goto err_free_mem; @@ -1743,6 +1757,11 @@ int otx2_open(struct net_device *netdev) else cq_poll->cq_ids[CQ_XDP] = CINT_INVALID_CQ; + cq_poll->cq_ids[CQ_QOS] = (qidx < pf->hw.tc_tx_queues) ? + (qidx + pf->hw.rx_queues + + pf->hw.non_qos_queues) : + CINT_INVALID_CQ; + cq_poll->dev = (void *)pf; cq_poll->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE; INIT_WORK(&cq_poll->dim.work, otx2_dim_work); @@ -1826,6 +1845,9 @@ int otx2_open(struct net_device *netdev) /* 'intf_down' may be checked on any cpu */ smp_wmb(); + /* Enable QoS configuration before starting tx queues */ + otx2_qos_config_txschq(pf); + /* we have already received link status notification */ if (pf->linfo.link_up && !(pf->pcifunc & RVU_PFVF_FUNC_MASK)) otx2_handle_link_event(pf); @@ -1920,6 +1942,10 @@ int otx2_stop(struct net_device *netdev) netif_tx_disable(netdev); + for (wrk = 0; wrk < pf->qset.cq_cnt; wrk++) + cancel_delayed_work_sync(&pf->refill_wrk[wrk].pool_refill_work); + devm_kfree(pf->dev, pf->refill_wrk); + otx2_free_hw_resources(pf); otx2_free_cints(pf, pf->hw.cint_cnt); otx2_disable_napi(pf); @@ -1927,9 +1953,6 @@ int otx2_stop(struct net_device *netdev) for (qidx = 0; qidx < netdev->num_tx_queues; qidx++) netdev_tx_reset_queue(netdev_get_tx_queue(netdev, qidx)); - for (wrk = 0; wrk < pf->qset.cq_cnt; wrk++) - cancel_delayed_work_sync(&pf->refill_wrk[wrk].pool_refill_work); - devm_kfree(pf->dev, pf->refill_wrk); kfree(qset->sq); kfree(qset->cq); @@ -1947,6 +1970,12 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev) int qidx = skb_get_queue_mapping(skb); struct otx2_snd_queue *sq; struct netdev_queue *txq; + int sq_idx; + + /* XDP SQs are not mapped with TXQs + * advance qid to derive correct sq mapped with QOS + */ + sq_idx = (qidx >= pf->hw.tx_queues) ? (qidx + pf->hw.xdp_queues) : qidx; /* Check for minimum and maximum packet length */ if (skb->len <= ETH_HLEN || @@ -1955,7 +1984,7 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; } - sq = &pf->qset.sq[qidx]; + sq = &pf->qset.sq[sq_idx]; txq = netdev_get_tx_queue(netdev, qidx); if (!otx2_sq_append_skb(netdev, sq, skb, qidx)) { @@ -1973,14 +2002,48 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; } -static u16 otx2_select_queue(struct net_device *netdev, struct sk_buff *skb, - struct net_device *sb_dev) +static int otx2_qos_select_htb_queue(struct otx2_nic *pf, struct sk_buff *skb, + u16 htb_maj_id) +{ + u16 classid; + + if ((TC_H_MAJ(skb->priority) >> 16) == htb_maj_id) + classid = TC_H_MIN(skb->priority); + else + classid = READ_ONCE(pf->qos.defcls); + + if (!classid) + return 0; + + return otx2_get_txq_by_classid(pf, classid); +} + +u16 otx2_select_queue(struct net_device *netdev, struct sk_buff *skb, + struct net_device *sb_dev) { -#ifdef CONFIG_DCB struct otx2_nic *pf = netdev_priv(netdev); + bool qos_enabled; +#ifdef CONFIG_DCB u8 vlan_prio; #endif + int txq; + + qos_enabled = (netdev->real_num_tx_queues > pf->hw.tx_queues) ? true : false; + if (unlikely(qos_enabled)) { + /* This smp_load_acquire() pairs with smp_store_release() in + * otx2_qos_root_add() called from htb offload root creation + */ + u16 htb_maj_id = smp_load_acquire(&pf->qos.maj_id); + if (unlikely(htb_maj_id)) { + txq = otx2_qos_select_htb_queue(pf, skb, htb_maj_id); + if (txq > 0) + return txq; + goto process_pfc; + } + } + +process_pfc: #ifdef CONFIG_DCB if (!skb_vlan_tag_present(skb)) goto pick_tx; @@ -1994,8 +2057,13 @@ static u16 otx2_select_queue(struct net_device *netdev, struct sk_buff *skb, pick_tx: #endif - return netdev_pick_tx(netdev, skb, NULL); + txq = netdev_pick_tx(netdev, skb, NULL); + if (unlikely(qos_enabled)) + return txq % pf->hw.tx_queues; + + return txq; } +EXPORT_SYMBOL(otx2_select_queue); static netdev_features_t otx2_fix_features(struct net_device *dev, netdev_features_t features) @@ -2529,7 +2597,7 @@ static int otx2_xdp_setup(struct otx2_nic *pf, struct bpf_prog *prog) xdp_features_clear_redirect_target(dev); } - pf->hw.tot_tx_queues += pf->hw.xdp_queues; + pf->hw.non_qos_queues += pf->hw.xdp_queues; if (if_up) otx2_open(pf->netdev); @@ -2712,10 +2780,10 @@ static void otx2_sriov_vfcfg_cleanup(struct otx2_nic *pf) static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct device *dev = &pdev->dev; + int err, qcount, qos_txqs; struct net_device *netdev; struct otx2_nic *pf; struct otx2_hw *hw; - int err, qcount; int num_vec; err = pcim_enable_device(pdev); @@ -2740,8 +2808,9 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* Set number of queues */ qcount = min_t(int, num_online_cpus(), OTX2_MAX_CQ_CNT); + qos_txqs = min_t(int, qcount, OTX2_QOS_MAX_LEAF_NODES); - netdev = alloc_etherdev_mqs(sizeof(*pf), qcount, qcount); + netdev = alloc_etherdev_mqs(sizeof(*pf), qcount + qos_txqs, qcount); if (!netdev) { err = -ENOMEM; goto err_release_regions; @@ -2760,7 +2829,7 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) hw->pdev = pdev; hw->rx_queues = qcount; hw->tx_queues = qcount; - hw->tot_tx_queues = qcount; + hw->non_qos_queues = qcount; hw->max_queues = qcount; hw->rbuf_len = OTX2_DEFAULT_RBUF_LEN; /* Use CQE of 128 byte descriptor size by default */ @@ -2929,6 +2998,8 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_pf_sriov_init; #endif + otx2_qos_init(pf, qos_txqs); + return 0; err_pf_sriov_init: @@ -3104,6 +3175,7 @@ static void otx2_remove(struct pci_dev *pdev) otx2_ptp_destroy(pf); otx2_mcam_flow_del(pf); otx2_shutdown_tc(pf); + otx2_shutdown_qos(pf); otx2_detach_resources(&pf->mbox); if (pf->hw.lmt_info) free_percpu(pf->hw.lmt_info); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h index 1b967eaf948b..45a32e4b49d1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h @@ -145,12 +145,25 @@ #define NIX_AF_TL1X_TOPOLOGY(a) (0xC80 | (a) << 16) #define NIX_AF_TL2X_PARENT(a) (0xE88 | (a) << 16) #define NIX_AF_TL2X_SCHEDULE(a) (0xE00 | (a) << 16) +#define NIX_AF_TL2X_TOPOLOGY(a) (0xE80 | (a) << 16) +#define NIX_AF_TL2X_CIR(a) (0xE20 | (a) << 16) +#define NIX_AF_TL2X_PIR(a) (0xE30 | (a) << 16) #define NIX_AF_TL3X_PARENT(a) (0x1088 | (a) << 16) #define NIX_AF_TL3X_SCHEDULE(a) (0x1000 | (a) << 16) +#define NIX_AF_TL3X_SHAPE(a) (0x1010 | (a) << 16) +#define NIX_AF_TL3X_CIR(a) (0x1020 | (a) << 16) +#define NIX_AF_TL3X_PIR(a) (0x1030 | (a) << 16) +#define NIX_AF_TL3X_TOPOLOGY(a) (0x1080 | (a) << 16) #define NIX_AF_TL4X_PARENT(a) (0x1288 | (a) << 16) #define NIX_AF_TL4X_SCHEDULE(a) (0x1200 | (a) << 16) +#define NIX_AF_TL4X_SHAPE(a) (0x1210 | (a) << 16) +#define NIX_AF_TL4X_CIR(a) (0x1220 | (a) << 16) #define NIX_AF_TL4X_PIR(a) (0x1230 | (a) << 16) +#define NIX_AF_TL4X_TOPOLOGY(a) (0x1280 | (a) << 16) #define NIX_AF_MDQX_SCHEDULE(a) (0x1400 | (a) << 16) +#define NIX_AF_MDQX_SHAPE(a) (0x1410 | (a) << 16) +#define NIX_AF_MDQX_CIR(a) (0x1420 | (a) << 16) +#define NIX_AF_MDQX_PIR(a) (0x1430 | (a) << 16) #define NIX_AF_MDQX_PARENT(a) (0x1480 | (a) << 16) #define NIX_AF_TL3_TL2X_LINKX_CFG(a, b) (0x1700 | (a) << 16 | (b) << 3) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index 8392f63e433f..5e56b6c3e60a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -19,25 +19,11 @@ #include "cn10k.h" #include "otx2_common.h" - -/* Egress rate limiting definitions */ -#define MAX_BURST_EXPONENT 0x0FULL -#define MAX_BURST_MANTISSA 0xFFULL -#define MAX_BURST_SIZE 130816ULL -#define MAX_RATE_DIVIDER_EXPONENT 12ULL -#define MAX_RATE_EXPONENT 0x0FULL -#define MAX_RATE_MANTISSA 0xFFULL +#include "qos.h" #define CN10K_MAX_BURST_MANTISSA 0x7FFFULL #define CN10K_MAX_BURST_SIZE 8453888ULL -/* Bitfields in NIX_TLX_PIR register */ -#define TLX_RATE_MANTISSA GENMASK_ULL(8, 1) -#define TLX_RATE_EXPONENT GENMASK_ULL(12, 9) -#define TLX_RATE_DIVIDER_EXPONENT GENMASK_ULL(16, 13) -#define TLX_BURST_MANTISSA GENMASK_ULL(36, 29) -#define TLX_BURST_EXPONENT GENMASK_ULL(40, 37) - #define CN10K_TLX_BURST_MANTISSA GENMASK_ULL(43, 29) #define CN10K_TLX_BURST_EXPONENT GENMASK_ULL(47, 44) @@ -147,8 +133,8 @@ static void otx2_get_egress_rate_cfg(u64 maxrate, u32 *exp, } } -static u64 otx2_get_txschq_rate_regval(struct otx2_nic *nic, - u64 maxrate, u32 burst) +u64 otx2_get_txschq_rate_regval(struct otx2_nic *nic, + u64 maxrate, u32 burst) { u32 burst_exp, burst_mantissa; u32 exp, mantissa, div_exp; @@ -264,7 +250,6 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic, struct netlink_ext_ack *extack = cls->common.extack; struct flow_action *actions = &cls->rule->action; struct flow_action_entry *entry; - u64 rate; int err; err = otx2_tc_validate_flow(nic, actions, extack); @@ -288,10 +273,8 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic, NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second"); return -EOPNOTSUPP; } - /* Convert bytes per second to Mbps */ - rate = entry->police.rate_bytes_ps * 8; - rate = max_t(u64, rate / 1000000, 1); - err = otx2_set_matchall_egress_rate(nic, entry->police.burst, rate); + err = otx2_set_matchall_egress_rate(nic, entry->police.burst, + otx2_convert_rate(entry->police.rate_bytes_ps)); if (err) return err; nic->flags |= OTX2_FLAG_TC_MATCHALL_EGRESS_ENABLED; @@ -413,8 +396,12 @@ static int otx2_tc_parse_actions(struct otx2_nic *nic, return -EOPNOTSUPP; } req->vf = priv->pcifunc & RVU_PFVF_FUNC_MASK; - req->op = NIX_RX_ACTION_DEFAULT; - return 0; + + /* if op is already set; avoid overwriting the same */ + if (!req->op) + req->op = NIX_RX_ACTION_DEFAULT; + break; + case FLOW_ACTION_VLAN_POP: req->vtag0_valid = true; /* use RX_VTAG_TYPE7 which is initialized to strip vlan tag */ @@ -450,6 +437,12 @@ static int otx2_tc_parse_actions(struct otx2_nic *nic, case FLOW_ACTION_MARK: mark = act->mark; break; + + case FLOW_ACTION_RX_QUEUE_MAPPING: + req->op = NIX_RX_ACTIONOP_UCAST; + req->index = act->rx_queue; + break; + default: return -EOPNOTSUPP; } @@ -604,6 +597,21 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node, return -EOPNOTSUPP; } + if (!match.mask->vlan_id) { + struct flow_action_entry *act; + int i; + + flow_action_for_each(i, act, &rule->action) { + if (act->id == FLOW_ACTION_DROP) { + netdev_err(nic->netdev, + "vlan tpid 0x%x with vlan_id %d is not supported for DROP rule.\n", + ntohs(match.key->vlan_tpid), + match.key->vlan_id); + return -EOPNOTSUPP; + } + } + } + if (match.mask->vlan_id || match.mask->vlan_dei || match.mask->vlan_priority) { @@ -1127,6 +1135,8 @@ int otx2_setup_tc(struct net_device *netdev, enum tc_setup_type type, switch (type) { case TC_SETUP_BLOCK: return otx2_setup_tc_block(netdev, type_data); + case TC_SETUP_QDISC_HTB: + return otx2_setup_tc_htb(netdev, type_data); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 7af223b0a37f..e77d43848955 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -217,9 +217,6 @@ static bool otx2_skb_add_frag(struct otx2_nic *pfvf, struct sk_buff *skb, skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, va - page_address(page) + off, len - off, pfvf->rbsize); - - otx2_dma_unmap_page(pfvf, iova - OTX2_HEAD_ROOM, - pfvf->rbsize, DMA_FROM_DEVICE); return true; } @@ -382,6 +379,8 @@ static void otx2_rcv_pkt_handler(struct otx2_nic *pfvf, if (pfvf->netdev->features & NETIF_F_RXCSUM) skb->ip_summed = CHECKSUM_UNNECESSARY; + skb_mark_for_recycle(skb); + napi_gro_frags(napi); } @@ -425,9 +424,10 @@ process_cqe: return processed_cqe; } -void otx2_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq) +int otx2_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq) { struct otx2_nic *pfvf = dev; + int cnt = cq->pool_ptrs; dma_addr_t bufptr; while (cq->pool_ptrs) { @@ -436,6 +436,8 @@ void otx2_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq) otx2_aura_freeptr(pfvf, cq->cq_idx, bufptr + OTX2_HEAD_ROOM); cq->pool_ptrs--; } + + return cnt - cq->pool_ptrs; } static int otx2_tx_napi_handler(struct otx2_nic *pfvf, @@ -464,12 +466,13 @@ process_cqe: break; } - if (cq->cq_type == CQ_XDP) { + qidx = cq->cq_idx - pfvf->hw.rx_queues; + + if (cq->cq_type == CQ_XDP) otx2_xdp_snd_pkt_handler(pfvf, sq, cqe); - } else { - otx2_snd_pkt_handler(pfvf, cq, sq, cqe, budget, - &tx_pkts, &tx_bytes); - } + else + otx2_snd_pkt_handler(pfvf, cq, &pfvf->qset.sq[qidx], + cqe, budget, &tx_pkts, &tx_bytes); cqe->hdr.cqe_type = NIX_XQE_TYPE_INVALID; processed_cqe++; @@ -486,7 +489,11 @@ process_cqe: if (likely(tx_pkts)) { struct netdev_queue *txq; - txq = netdev_get_tx_queue(pfvf->netdev, cq->cint_idx); + qidx = cq->cq_idx - pfvf->hw.rx_queues; + + if (qidx >= pfvf->hw.tx_queues) + qidx -= pfvf->hw.xdp_queues; + txq = netdev_get_tx_queue(pfvf->netdev, qidx); netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); /* Check if queue was stopped earlier due to ring full */ smp_mb(); @@ -517,6 +524,7 @@ int otx2_napi_handler(struct napi_struct *napi, int budget) struct otx2_cq_queue *cq; struct otx2_qset *qset; struct otx2_nic *pfvf; + int filled_cnt = -1; cq_poll = container_of(napi, struct otx2_cq_poll, napi); pfvf = (struct otx2_nic *)cq_poll->dev; @@ -537,7 +545,7 @@ int otx2_napi_handler(struct napi_struct *napi, int budget) } if (rx_cq && rx_cq->pool_ptrs) - pfvf->hw_ops->refill_pool_ptrs(pfvf, rx_cq); + filled_cnt = pfvf->hw_ops->refill_pool_ptrs(pfvf, rx_cq); /* Clear the IRQ */ otx2_write64(pfvf, NIX_LF_CINTX_INT(cq_poll->cint_idx), BIT_ULL(0)); @@ -557,9 +565,25 @@ int otx2_napi_handler(struct napi_struct *napi, int budget) otx2_config_irq_coalescing(pfvf, i); } - /* Re-enable interrupts */ - otx2_write64(pfvf, NIX_LF_CINTX_ENA_W1S(cq_poll->cint_idx), - BIT_ULL(0)); + if (unlikely(!filled_cnt)) { + struct refill_work *work; + struct delayed_work *dwork; + + work = &pfvf->refill_wrk[cq->cq_idx]; + dwork = &work->pool_refill_work; + /* Schedule a task if no other task is running */ + if (!cq->refill_task_sched) { + work->napi = napi; + cq->refill_task_sched = true; + schedule_delayed_work(dwork, + msecs_to_jiffies(100)); + } + } else { + /* Re-enable interrupts */ + otx2_write64(pfvf, + NIX_LF_CINTX_ENA_W1S(cq_poll->cint_idx), + BIT_ULL(0)); + } } return workdone; } @@ -734,7 +758,8 @@ static void otx2_sqe_add_hdr(struct otx2_nic *pfvf, struct otx2_snd_queue *sq, sqe_hdr->aura = sq->aura_id; /* Post a CQE Tx after pkt transmission */ sqe_hdr->pnc = 1; - sqe_hdr->sq = qidx; + sqe_hdr->sq = (qidx >= pfvf->hw.tx_queues) ? + qidx + pfvf->hw.xdp_queues : qidx; } sqe_hdr->total = skb->len; /* Set SQE identifier which will be used later for freeing SKB */ @@ -1178,11 +1203,13 @@ bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq, } EXPORT_SYMBOL(otx2_sq_append_skb); -void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq) +void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, int qidx) { struct nix_cqe_rx_s *cqe; + struct otx2_pool *pool; int processed_cqe = 0; - u64 iova, pa; + u16 pool_id; + u64 iova; if (pfvf->xdp_prog) xdp_rxq_info_unreg(&cq->xdp_rxq); @@ -1190,6 +1217,9 @@ void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq) if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe) return; + pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_RQ, qidx); + pool = &pfvf->qset.pool[pool_id]; + while (cq->pend_cqe) { cqe = (struct nix_cqe_rx_s *)otx2_get_next_cqe(cq); processed_cqe++; @@ -1202,9 +1232,8 @@ void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq) continue; } iova = cqe->sg.seg_addr - OTX2_HEAD_ROOM; - pa = otx2_iova_to_phys(pfvf->iommu_domain, iova); - otx2_dma_unmap_page(pfvf, iova, pfvf->rbsize, DMA_FROM_DEVICE); - put_page(virt_to_page(phys_to_virt(pa))); + + otx2_free_bufs(pfvf, pool, iova, pfvf->rbsize); } /* Free CQEs to HW */ @@ -1219,8 +1248,10 @@ void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq) struct nix_cqe_tx_s *cqe; int processed_cqe = 0; struct sg_list *sg; + int qidx; - sq = &pfvf->qset.sq[cq->cint_idx]; + qidx = cq->cq_idx - pfvf->hw.rx_queues; + sq = &pfvf->qset.sq[qidx]; if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe) return; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h index 93cac2c2664c..a82ffca8ce1b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h @@ -23,6 +23,8 @@ #define OTX2_ETH_HLEN (VLAN_ETH_HLEN + VLAN_HLEN) #define OTX2_MIN_MTU 60 +#define OTX2_PAGE_POOL_SZ 2048 + #define OTX2_MAX_GSO_SEGS 255 #define OTX2_MAX_FRAGS_IN_SQE 9 @@ -102,7 +104,8 @@ enum cq_type { CQ_RX, CQ_TX, CQ_XDP, - CQS_PER_CINT = 3, /* RQ + SQ + XDP */ + CQ_QOS, + CQS_PER_CINT = 4, /* RQ + SQ + XDP + QOS_SQ */ }; struct otx2_cq_poll { @@ -117,6 +120,7 @@ struct otx2_cq_poll { struct otx2_pool { struct qmem *stack; struct qmem *fc_addr; + struct page_pool *page_pool; u16 rbsize; }; @@ -166,6 +170,6 @@ void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx); void otx2_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx); -void otx2_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq); -void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq); +int otx2_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq); +int cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq); #endif /* OTX2_TXRX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 53366dbfbf27..35e06048356f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -70,10 +70,6 @@ static void otx2vf_process_vfaf_mbox_msg(struct otx2_nic *vf, case MBOX_MSG_NIX_LF_ALLOC: mbox_handler_nix_lf_alloc(vf, (struct nix_lf_alloc_rsp *)msg); break; - case MBOX_MSG_NIX_TXSCH_ALLOC: - mbox_handler_nix_txsch_alloc(vf, - (struct nix_txsch_alloc_rsp *)msg); - break; case MBOX_MSG_NIX_BP_ENABLE: mbox_handler_nix_bp_enable(vf, (struct nix_bp_cfg_rsp *)msg); break; @@ -297,9 +293,8 @@ static int otx2vf_vfaf_mbox_init(struct otx2_nic *vf) int err; mbox->pfvf = vf; - vf->mbox_wq = alloc_workqueue("otx2_vfaf_mailbox", - WQ_UNBOUND | WQ_HIGHPRI | - WQ_MEM_RECLAIM, 1); + vf->mbox_wq = alloc_ordered_workqueue("otx2_vfaf_mailbox", + WQ_HIGHPRI | WQ_MEM_RECLAIM); if (!vf->mbox_wq) return -ENOMEM; @@ -479,6 +474,7 @@ static const struct net_device_ops otx2vf_netdev_ops = { .ndo_open = otx2vf_open, .ndo_stop = otx2vf_stop, .ndo_start_xmit = otx2vf_xmit, + .ndo_select_queue = otx2_select_queue, .ndo_set_rx_mode = otx2vf_set_rx_mode, .ndo_set_mac_address = otx2_set_mac_address, .ndo_change_mtu = otx2vf_change_mtu, @@ -524,10 +520,10 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int num_vec = pci_msix_vec_count(pdev); struct device *dev = &pdev->dev; + int err, qcount, qos_txqs; struct net_device *netdev; struct otx2_nic *vf; struct otx2_hw *hw; - int err, qcount; err = pcim_enable_device(pdev); if (err) { @@ -550,7 +546,8 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_master(pdev); qcount = num_online_cpus(); - netdev = alloc_etherdev_mqs(sizeof(*vf), qcount, qcount); + qos_txqs = min_t(int, qcount, OTX2_QOS_MAX_LEAF_NODES); + netdev = alloc_etherdev_mqs(sizeof(*vf), qcount + qos_txqs, qcount); if (!netdev) { err = -ENOMEM; goto err_release_regions; @@ -570,7 +567,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) hw->rx_queues = qcount; hw->tx_queues = qcount; hw->max_queues = qcount; - hw->tot_tx_queues = qcount; + hw->non_qos_queues = qcount; hw->rbuf_len = OTX2_DEFAULT_RBUF_LEN; /* Use CQE of 128 byte descriptor size by default */ hw->xqe_size = 128; @@ -699,6 +696,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (err) goto err_shutdown_tc; #endif + otx2_qos_init(vf, qos_txqs); return 0; @@ -761,6 +759,7 @@ static void otx2vf_remove(struct pci_dev *pdev) otx2_ptp_destroy(vf); otx2_mcam_flow_del(vf); otx2_shutdown_tc(vf); + otx2_shutdown_qos(vf); otx2vf_disable_mbox_intr(vf); otx2_detach_resources(&vf->mbox); free_percpu(vf->hw.lmt_info); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c new file mode 100644 index 000000000000..d3a76c5ccda8 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c @@ -0,0 +1,1363 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell RVU Ethernet driver + * + * Copyright (C) 2023 Marvell. + * + */ +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/inetdevice.h> +#include <linux/bitfield.h> + +#include "otx2_common.h" +#include "cn10k.h" +#include "qos.h" + +#define OTX2_QOS_QID_INNER 0xFFFFU +#define OTX2_QOS_QID_NONE 0xFFFEU +#define OTX2_QOS_ROOT_CLASSID 0xFFFFFFFF +#define OTX2_QOS_CLASS_NONE 0 +#define OTX2_QOS_DEFAULT_PRIO 0xF +#define OTX2_QOS_INVALID_SQ 0xFFFF + +static void otx2_qos_update_tx_netdev_queues(struct otx2_nic *pfvf) +{ + struct otx2_hw *hw = &pfvf->hw; + int tx_queues, qos_txqs, err; + + qos_txqs = bitmap_weight(pfvf->qos.qos_sq_bmap, + OTX2_QOS_MAX_LEAF_NODES); + + tx_queues = hw->tx_queues + qos_txqs; + + err = netif_set_real_num_tx_queues(pfvf->netdev, tx_queues); + if (err) { + netdev_err(pfvf->netdev, + "Failed to set no of Tx queues: %d\n", tx_queues); + return; + } +} + +static void otx2_qos_get_regaddr(struct otx2_qos_node *node, + struct nix_txschq_config *cfg, + int index) +{ + if (node->level == NIX_TXSCH_LVL_SMQ) { + cfg->reg[index++] = NIX_AF_MDQX_PARENT(node->schq); + cfg->reg[index++] = NIX_AF_MDQX_SCHEDULE(node->schq); + cfg->reg[index++] = NIX_AF_MDQX_PIR(node->schq); + cfg->reg[index] = NIX_AF_MDQX_CIR(node->schq); + } else if (node->level == NIX_TXSCH_LVL_TL4) { + cfg->reg[index++] = NIX_AF_TL4X_PARENT(node->schq); + cfg->reg[index++] = NIX_AF_TL4X_SCHEDULE(node->schq); + cfg->reg[index++] = NIX_AF_TL4X_PIR(node->schq); + cfg->reg[index] = NIX_AF_TL4X_CIR(node->schq); + } else if (node->level == NIX_TXSCH_LVL_TL3) { + cfg->reg[index++] = NIX_AF_TL3X_PARENT(node->schq); + cfg->reg[index++] = NIX_AF_TL3X_SCHEDULE(node->schq); + cfg->reg[index++] = NIX_AF_TL3X_PIR(node->schq); + cfg->reg[index] = NIX_AF_TL3X_CIR(node->schq); + } else if (node->level == NIX_TXSCH_LVL_TL2) { + cfg->reg[index++] = NIX_AF_TL2X_PARENT(node->schq); + cfg->reg[index++] = NIX_AF_TL2X_SCHEDULE(node->schq); + cfg->reg[index++] = NIX_AF_TL2X_PIR(node->schq); + cfg->reg[index] = NIX_AF_TL2X_CIR(node->schq); + } +} + +static void otx2_config_sched_shaping(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct nix_txschq_config *cfg, + int *num_regs) +{ + u64 maxrate; + + otx2_qos_get_regaddr(node, cfg, *num_regs); + + /* configure parent txschq */ + cfg->regval[*num_regs] = node->parent->schq << 16; + (*num_regs)++; + + /* configure prio/quantum */ + if (node->qid == OTX2_QOS_QID_NONE) { + cfg->regval[*num_regs] = node->prio << 24 | + mtu_to_dwrr_weight(pfvf, pfvf->tx_max_pktlen); + (*num_regs)++; + return; + } + + /* configure priority */ + cfg->regval[*num_regs] = (node->schq - node->parent->prio_anchor) << 24; + (*num_regs)++; + + /* configure PIR */ + maxrate = (node->rate > node->ceil) ? node->rate : node->ceil; + + cfg->regval[*num_regs] = + otx2_get_txschq_rate_regval(pfvf, maxrate, 65536); + (*num_regs)++; + + /* Don't configure CIR when both CIR+PIR not supported + * On 96xx, CIR + PIR + RED_ALGO=STALL causes deadlock + */ + if (!test_bit(QOS_CIR_PIR_SUPPORT, &pfvf->hw.cap_flag)) + return; + + cfg->regval[*num_regs] = + otx2_get_txschq_rate_regval(pfvf, node->rate, 65536); + (*num_regs)++; +} + +static void __otx2_qos_txschq_cfg(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct nix_txschq_config *cfg) +{ + struct otx2_hw *hw = &pfvf->hw; + int num_regs = 0; + u8 level; + + level = node->level; + + /* program txschq registers */ + if (level == NIX_TXSCH_LVL_SMQ) { + cfg->reg[num_regs] = NIX_AF_SMQX_CFG(node->schq); + cfg->regval[num_regs] = ((u64)pfvf->tx_max_pktlen << 8) | + OTX2_MIN_MTU; + cfg->regval[num_regs] |= (0x20ULL << 51) | (0x80ULL << 39) | + (0x2ULL << 36); + num_regs++; + + otx2_config_sched_shaping(pfvf, node, cfg, &num_regs); + + } else if (level == NIX_TXSCH_LVL_TL4) { + otx2_config_sched_shaping(pfvf, node, cfg, &num_regs); + } else if (level == NIX_TXSCH_LVL_TL3) { + /* configure link cfg */ + if (level == pfvf->qos.link_cfg_lvl) { + cfg->reg[num_regs] = NIX_AF_TL3_TL2X_LINKX_CFG(node->schq, hw->tx_link); + cfg->regval[num_regs] = BIT_ULL(13) | BIT_ULL(12); + num_regs++; + } + + otx2_config_sched_shaping(pfvf, node, cfg, &num_regs); + } else if (level == NIX_TXSCH_LVL_TL2) { + /* configure link cfg */ + if (level == pfvf->qos.link_cfg_lvl) { + cfg->reg[num_regs] = NIX_AF_TL3_TL2X_LINKX_CFG(node->schq, hw->tx_link); + cfg->regval[num_regs] = BIT_ULL(13) | BIT_ULL(12); + num_regs++; + } + + /* check if node is root */ + if (node->qid == OTX2_QOS_QID_INNER && !node->parent) { + cfg->reg[num_regs] = NIX_AF_TL2X_SCHEDULE(node->schq); + cfg->regval[num_regs] = TXSCH_TL1_DFLT_RR_PRIO << 24 | + mtu_to_dwrr_weight(pfvf, + pfvf->tx_max_pktlen); + num_regs++; + goto txschq_cfg_out; + } + + otx2_config_sched_shaping(pfvf, node, cfg, &num_regs); + } + +txschq_cfg_out: + cfg->num_regs = num_regs; +} + +static int otx2_qos_txschq_set_parent_topology(struct otx2_nic *pfvf, + struct otx2_qos_node *parent) +{ + struct mbox *mbox = &pfvf->mbox; + struct nix_txschq_config *cfg; + int rc; + + if (parent->level == NIX_TXSCH_LVL_MDQ) + return 0; + + mutex_lock(&mbox->lock); + + cfg = otx2_mbox_alloc_msg_nix_txschq_cfg(&pfvf->mbox); + if (!cfg) { + mutex_unlock(&mbox->lock); + return -ENOMEM; + } + + cfg->lvl = parent->level; + + if (parent->level == NIX_TXSCH_LVL_TL4) + cfg->reg[0] = NIX_AF_TL4X_TOPOLOGY(parent->schq); + else if (parent->level == NIX_TXSCH_LVL_TL3) + cfg->reg[0] = NIX_AF_TL3X_TOPOLOGY(parent->schq); + else if (parent->level == NIX_TXSCH_LVL_TL2) + cfg->reg[0] = NIX_AF_TL2X_TOPOLOGY(parent->schq); + else if (parent->level == NIX_TXSCH_LVL_TL1) + cfg->reg[0] = NIX_AF_TL1X_TOPOLOGY(parent->schq); + + cfg->regval[0] = (u64)parent->prio_anchor << 32; + if (parent->level == NIX_TXSCH_LVL_TL1) + cfg->regval[0] |= (u64)TXSCH_TL1_DFLT_RR_PRIO << 1; + + cfg->num_regs++; + + rc = otx2_sync_mbox_msg(&pfvf->mbox); + + mutex_unlock(&mbox->lock); + + return rc; +} + +static void otx2_qos_free_hw_node_schq(struct otx2_nic *pfvf, + struct otx2_qos_node *parent) +{ + struct otx2_qos_node *node; + + list_for_each_entry_reverse(node, &parent->child_schq_list, list) + otx2_txschq_free_one(pfvf, node->level, node->schq); +} + +static void otx2_qos_free_hw_node(struct otx2_nic *pfvf, + struct otx2_qos_node *parent) +{ + struct otx2_qos_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, &parent->child_list, list) { + otx2_qos_free_hw_node(pfvf, node); + otx2_qos_free_hw_node_schq(pfvf, node); + otx2_txschq_free_one(pfvf, node->level, node->schq); + } +} + +static void otx2_qos_free_hw_cfg(struct otx2_nic *pfvf, + struct otx2_qos_node *node) +{ + mutex_lock(&pfvf->qos.qos_lock); + + /* free child node hw mappings */ + otx2_qos_free_hw_node(pfvf, node); + otx2_qos_free_hw_node_schq(pfvf, node); + + /* free node hw mappings */ + otx2_txschq_free_one(pfvf, node->level, node->schq); + + mutex_unlock(&pfvf->qos.qos_lock); +} + +static void otx2_qos_sw_node_delete(struct otx2_nic *pfvf, + struct otx2_qos_node *node) +{ + hash_del_rcu(&node->hlist); + + if (node->qid != OTX2_QOS_QID_INNER && node->qid != OTX2_QOS_QID_NONE) { + __clear_bit(node->qid, pfvf->qos.qos_sq_bmap); + otx2_qos_update_tx_netdev_queues(pfvf); + } + + list_del(&node->list); + kfree(node); +} + +static void otx2_qos_free_sw_node_schq(struct otx2_nic *pfvf, + struct otx2_qos_node *parent) +{ + struct otx2_qos_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, &parent->child_schq_list, list) { + list_del(&node->list); + kfree(node); + } +} + +static void __otx2_qos_free_sw_node(struct otx2_nic *pfvf, + struct otx2_qos_node *parent) +{ + struct otx2_qos_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, &parent->child_list, list) { + __otx2_qos_free_sw_node(pfvf, node); + otx2_qos_free_sw_node_schq(pfvf, node); + otx2_qos_sw_node_delete(pfvf, node); + } +} + +static void otx2_qos_free_sw_node(struct otx2_nic *pfvf, + struct otx2_qos_node *node) +{ + mutex_lock(&pfvf->qos.qos_lock); + + __otx2_qos_free_sw_node(pfvf, node); + otx2_qos_free_sw_node_schq(pfvf, node); + otx2_qos_sw_node_delete(pfvf, node); + + mutex_unlock(&pfvf->qos.qos_lock); +} + +static void otx2_qos_destroy_node(struct otx2_nic *pfvf, + struct otx2_qos_node *node) +{ + otx2_qos_free_hw_cfg(pfvf, node); + otx2_qos_free_sw_node(pfvf, node); +} + +static void otx2_qos_fill_cfg_schq(struct otx2_qos_node *parent, + struct otx2_qos_cfg *cfg) +{ + struct otx2_qos_node *node; + + list_for_each_entry(node, &parent->child_schq_list, list) + cfg->schq[node->level]++; +} + +static void otx2_qos_fill_cfg_tl(struct otx2_qos_node *parent, + struct otx2_qos_cfg *cfg) +{ + struct otx2_qos_node *node; + + list_for_each_entry(node, &parent->child_list, list) { + otx2_qos_fill_cfg_tl(node, cfg); + cfg->schq_contig[node->level]++; + otx2_qos_fill_cfg_schq(node, cfg); + } +} + +static void otx2_qos_prepare_txschq_cfg(struct otx2_nic *pfvf, + struct otx2_qos_node *parent, + struct otx2_qos_cfg *cfg) +{ + mutex_lock(&pfvf->qos.qos_lock); + otx2_qos_fill_cfg_tl(parent, cfg); + mutex_unlock(&pfvf->qos.qos_lock); +} + +static void otx2_qos_read_txschq_cfg_schq(struct otx2_qos_node *parent, + struct otx2_qos_cfg *cfg) +{ + struct otx2_qos_node *node; + int cnt; + + list_for_each_entry(node, &parent->child_schq_list, list) { + cnt = cfg->dwrr_node_pos[node->level]; + cfg->schq_list[node->level][cnt] = node->schq; + cfg->schq[node->level]++; + cfg->dwrr_node_pos[node->level]++; + } +} + +static void otx2_qos_read_txschq_cfg_tl(struct otx2_qos_node *parent, + struct otx2_qos_cfg *cfg) +{ + struct otx2_qos_node *node; + int cnt; + + list_for_each_entry(node, &parent->child_list, list) { + otx2_qos_read_txschq_cfg_tl(node, cfg); + cnt = cfg->static_node_pos[node->level]; + cfg->schq_contig_list[node->level][cnt] = node->schq; + cfg->schq_contig[node->level]++; + cfg->static_node_pos[node->level]++; + otx2_qos_read_txschq_cfg_schq(node, cfg); + } +} + +static void otx2_qos_read_txschq_cfg(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct otx2_qos_cfg *cfg) +{ + mutex_lock(&pfvf->qos.qos_lock); + otx2_qos_read_txschq_cfg_tl(node, cfg); + mutex_unlock(&pfvf->qos.qos_lock); +} + +static struct otx2_qos_node * +otx2_qos_alloc_root(struct otx2_nic *pfvf) +{ + struct otx2_qos_node *node; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return ERR_PTR(-ENOMEM); + + node->parent = NULL; + if (!is_otx2_vf(pfvf->pcifunc)) + node->level = NIX_TXSCH_LVL_TL1; + else + node->level = NIX_TXSCH_LVL_TL2; + + WRITE_ONCE(node->qid, OTX2_QOS_QID_INNER); + node->classid = OTX2_QOS_ROOT_CLASSID; + + hash_add_rcu(pfvf->qos.qos_hlist, &node->hlist, node->classid); + list_add_tail(&node->list, &pfvf->qos.qos_tree); + INIT_LIST_HEAD(&node->child_list); + INIT_LIST_HEAD(&node->child_schq_list); + + return node; +} + +static int otx2_qos_add_child_node(struct otx2_qos_node *parent, + struct otx2_qos_node *node) +{ + struct list_head *head = &parent->child_list; + struct otx2_qos_node *tmp_node; + struct list_head *tmp; + + for (tmp = head->next; tmp != head; tmp = tmp->next) { + tmp_node = list_entry(tmp, struct otx2_qos_node, list); + if (tmp_node->prio == node->prio) + return -EEXIST; + if (tmp_node->prio > node->prio) { + list_add_tail(&node->list, tmp); + return 0; + } + } + + list_add_tail(&node->list, head); + return 0; +} + +static int otx2_qos_alloc_txschq_node(struct otx2_nic *pfvf, + struct otx2_qos_node *node) +{ + struct otx2_qos_node *txschq_node, *parent, *tmp; + int lvl; + + parent = node; + for (lvl = node->level - 1; lvl >= NIX_TXSCH_LVL_MDQ; lvl--) { + txschq_node = kzalloc(sizeof(*txschq_node), GFP_KERNEL); + if (!txschq_node) + goto err_out; + + txschq_node->parent = parent; + txschq_node->level = lvl; + txschq_node->classid = OTX2_QOS_CLASS_NONE; + WRITE_ONCE(txschq_node->qid, OTX2_QOS_QID_NONE); + txschq_node->rate = 0; + txschq_node->ceil = 0; + txschq_node->prio = 0; + + mutex_lock(&pfvf->qos.qos_lock); + list_add_tail(&txschq_node->list, &node->child_schq_list); + mutex_unlock(&pfvf->qos.qos_lock); + + INIT_LIST_HEAD(&txschq_node->child_list); + INIT_LIST_HEAD(&txschq_node->child_schq_list); + parent = txschq_node; + } + + return 0; + +err_out: + list_for_each_entry_safe(txschq_node, tmp, &node->child_schq_list, + list) { + list_del(&txschq_node->list); + kfree(txschq_node); + } + return -ENOMEM; +} + +static struct otx2_qos_node * +otx2_qos_sw_create_leaf_node(struct otx2_nic *pfvf, + struct otx2_qos_node *parent, + u16 classid, u32 prio, u64 rate, u64 ceil, + u16 qid) +{ + struct otx2_qos_node *node; + int err; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return ERR_PTR(-ENOMEM); + + node->parent = parent; + node->level = parent->level - 1; + node->classid = classid; + WRITE_ONCE(node->qid, qid); + + node->rate = otx2_convert_rate(rate); + node->ceil = otx2_convert_rate(ceil); + node->prio = prio; + + __set_bit(qid, pfvf->qos.qos_sq_bmap); + + hash_add_rcu(pfvf->qos.qos_hlist, &node->hlist, classid); + + mutex_lock(&pfvf->qos.qos_lock); + err = otx2_qos_add_child_node(parent, node); + if (err) { + mutex_unlock(&pfvf->qos.qos_lock); + return ERR_PTR(err); + } + mutex_unlock(&pfvf->qos.qos_lock); + + INIT_LIST_HEAD(&node->child_list); + INIT_LIST_HEAD(&node->child_schq_list); + + err = otx2_qos_alloc_txschq_node(pfvf, node); + if (err) { + otx2_qos_sw_node_delete(pfvf, node); + return ERR_PTR(-ENOMEM); + } + + return node; +} + +static struct otx2_qos_node * +otx2_sw_node_find(struct otx2_nic *pfvf, u32 classid) +{ + struct otx2_qos_node *node = NULL; + + hash_for_each_possible(pfvf->qos.qos_hlist, node, hlist, classid) { + if (node->classid == classid) + break; + } + + return node; +} + +static struct otx2_qos_node * +otx2_sw_node_find_rcu(struct otx2_nic *pfvf, u32 classid) +{ + struct otx2_qos_node *node = NULL; + + hash_for_each_possible_rcu(pfvf->qos.qos_hlist, node, hlist, classid) { + if (node->classid == classid) + break; + } + + return node; +} + +int otx2_get_txq_by_classid(struct otx2_nic *pfvf, u16 classid) +{ + struct otx2_qos_node *node; + u16 qid; + int res; + + node = otx2_sw_node_find_rcu(pfvf, classid); + if (!node) { + res = -ENOENT; + goto out; + } + qid = READ_ONCE(node->qid); + if (qid == OTX2_QOS_QID_INNER) { + res = -EINVAL; + goto out; + } + res = pfvf->hw.tx_queues + qid; +out: + return res; +} + +static int +otx2_qos_txschq_config(struct otx2_nic *pfvf, struct otx2_qos_node *node) +{ + struct mbox *mbox = &pfvf->mbox; + struct nix_txschq_config *req; + int rc; + + mutex_lock(&mbox->lock); + + req = otx2_mbox_alloc_msg_nix_txschq_cfg(&pfvf->mbox); + if (!req) { + mutex_unlock(&mbox->lock); + return -ENOMEM; + } + + req->lvl = node->level; + __otx2_qos_txschq_cfg(pfvf, node, req); + + rc = otx2_sync_mbox_msg(&pfvf->mbox); + + mutex_unlock(&mbox->lock); + + return rc; +} + +static int otx2_qos_txschq_alloc(struct otx2_nic *pfvf, + struct otx2_qos_cfg *cfg) +{ + struct nix_txsch_alloc_req *req; + struct nix_txsch_alloc_rsp *rsp; + struct mbox *mbox = &pfvf->mbox; + int lvl, rc, schq; + + mutex_lock(&mbox->lock); + req = otx2_mbox_alloc_msg_nix_txsch_alloc(&pfvf->mbox); + if (!req) { + mutex_unlock(&mbox->lock); + return -ENOMEM; + } + + for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) { + req->schq[lvl] = cfg->schq[lvl]; + req->schq_contig[lvl] = cfg->schq_contig[lvl]; + } + + rc = otx2_sync_mbox_msg(&pfvf->mbox); + if (rc) { + mutex_unlock(&mbox->lock); + return rc; + } + + rsp = (struct nix_txsch_alloc_rsp *) + otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0, &req->hdr); + + if (IS_ERR(rsp)) { + rc = PTR_ERR(rsp); + goto out; + } + + for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) { + for (schq = 0; schq < rsp->schq_contig[lvl]; schq++) { + cfg->schq_contig_list[lvl][schq] = + rsp->schq_contig_list[lvl][schq]; + } + } + + for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) { + for (schq = 0; schq < rsp->schq[lvl]; schq++) { + cfg->schq_list[lvl][schq] = + rsp->schq_list[lvl][schq]; + } + } + + pfvf->qos.link_cfg_lvl = rsp->link_cfg_lvl; + +out: + mutex_unlock(&mbox->lock); + return rc; +} + +static void otx2_qos_txschq_fill_cfg_schq(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct otx2_qos_cfg *cfg) +{ + struct otx2_qos_node *tmp; + int cnt; + + list_for_each_entry(tmp, &node->child_schq_list, list) { + cnt = cfg->dwrr_node_pos[tmp->level]; + tmp->schq = cfg->schq_list[tmp->level][cnt]; + cfg->dwrr_node_pos[tmp->level]++; + } +} + +static void otx2_qos_txschq_fill_cfg_tl(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct otx2_qos_cfg *cfg) +{ + struct otx2_qos_node *tmp; + int cnt; + + list_for_each_entry(tmp, &node->child_list, list) { + otx2_qos_txschq_fill_cfg_tl(pfvf, tmp, cfg); + cnt = cfg->static_node_pos[tmp->level]; + tmp->schq = cfg->schq_contig_list[tmp->level][cnt]; + if (cnt == 0) + node->prio_anchor = tmp->schq; + cfg->static_node_pos[tmp->level]++; + otx2_qos_txschq_fill_cfg_schq(pfvf, tmp, cfg); + } +} + +static void otx2_qos_txschq_fill_cfg(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct otx2_qos_cfg *cfg) +{ + mutex_lock(&pfvf->qos.qos_lock); + otx2_qos_txschq_fill_cfg_tl(pfvf, node, cfg); + otx2_qos_txschq_fill_cfg_schq(pfvf, node, cfg); + mutex_unlock(&pfvf->qos.qos_lock); +} + +static int otx2_qos_txschq_push_cfg_schq(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct otx2_qos_cfg *cfg) +{ + struct otx2_qos_node *tmp; + int ret; + + list_for_each_entry(tmp, &node->child_schq_list, list) { + ret = otx2_qos_txschq_config(pfvf, tmp); + if (ret) + return -EIO; + ret = otx2_qos_txschq_set_parent_topology(pfvf, tmp->parent); + if (ret) + return -EIO; + } + + return 0; +} + +static int otx2_qos_txschq_push_cfg_tl(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct otx2_qos_cfg *cfg) +{ + struct otx2_qos_node *tmp; + int ret; + + list_for_each_entry(tmp, &node->child_list, list) { + ret = otx2_qos_txschq_push_cfg_tl(pfvf, tmp, cfg); + if (ret) + return -EIO; + ret = otx2_qos_txschq_config(pfvf, tmp); + if (ret) + return -EIO; + ret = otx2_qos_txschq_push_cfg_schq(pfvf, tmp, cfg); + if (ret) + return -EIO; + } + + ret = otx2_qos_txschq_set_parent_topology(pfvf, node); + if (ret) + return -EIO; + + return 0; +} + +static int otx2_qos_txschq_push_cfg(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct otx2_qos_cfg *cfg) +{ + int ret; + + mutex_lock(&pfvf->qos.qos_lock); + ret = otx2_qos_txschq_push_cfg_tl(pfvf, node, cfg); + if (ret) + goto out; + ret = otx2_qos_txschq_push_cfg_schq(pfvf, node, cfg); +out: + mutex_unlock(&pfvf->qos.qos_lock); + return ret; +} + +static int otx2_qos_txschq_update_config(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct otx2_qos_cfg *cfg) +{ + otx2_qos_txschq_fill_cfg(pfvf, node, cfg); + + return otx2_qos_txschq_push_cfg(pfvf, node, cfg); +} + +static int otx2_qos_txschq_update_root_cfg(struct otx2_nic *pfvf, + struct otx2_qos_node *root, + struct otx2_qos_cfg *cfg) +{ + root->schq = cfg->schq_list[root->level][0]; + return otx2_qos_txschq_config(pfvf, root); +} + +static void otx2_qos_free_cfg(struct otx2_nic *pfvf, struct otx2_qos_cfg *cfg) +{ + int lvl, idx, schq; + + for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) { + for (idx = 0; idx < cfg->schq[lvl]; idx++) { + schq = cfg->schq_list[lvl][idx]; + otx2_txschq_free_one(pfvf, lvl, schq); + } + } + + for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) { + for (idx = 0; idx < cfg->schq_contig[lvl]; idx++) { + schq = cfg->schq_contig_list[lvl][idx]; + otx2_txschq_free_one(pfvf, lvl, schq); + } + } +} + +static void otx2_qos_enadis_sq(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + u16 qid) +{ + if (pfvf->qos.qid_to_sqmap[qid] != OTX2_QOS_INVALID_SQ) + otx2_qos_disable_sq(pfvf, qid); + + pfvf->qos.qid_to_sqmap[qid] = node->schq; + otx2_qos_enable_sq(pfvf, qid); +} + +static void otx2_qos_update_smq_schq(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + bool action) +{ + struct otx2_qos_node *tmp; + + if (node->qid == OTX2_QOS_QID_INNER) + return; + + list_for_each_entry(tmp, &node->child_schq_list, list) { + if (tmp->level == NIX_TXSCH_LVL_MDQ) { + if (action == QOS_SMQ_FLUSH) + otx2_smq_flush(pfvf, tmp->schq); + else + otx2_qos_enadis_sq(pfvf, tmp, node->qid); + } + } +} + +static void __otx2_qos_update_smq(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + bool action) +{ + struct otx2_qos_node *tmp; + + list_for_each_entry(tmp, &node->child_list, list) { + __otx2_qos_update_smq(pfvf, tmp, action); + if (tmp->qid == OTX2_QOS_QID_INNER) + continue; + if (tmp->level == NIX_TXSCH_LVL_MDQ) { + if (action == QOS_SMQ_FLUSH) + otx2_smq_flush(pfvf, tmp->schq); + else + otx2_qos_enadis_sq(pfvf, tmp, tmp->qid); + } else { + otx2_qos_update_smq_schq(pfvf, tmp, action); + } + } +} + +static void otx2_qos_update_smq(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + bool action) +{ + mutex_lock(&pfvf->qos.qos_lock); + __otx2_qos_update_smq(pfvf, node, action); + otx2_qos_update_smq_schq(pfvf, node, action); + mutex_unlock(&pfvf->qos.qos_lock); +} + +static int otx2_qos_push_txschq_cfg(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct otx2_qos_cfg *cfg) +{ + int ret; + + ret = otx2_qos_txschq_alloc(pfvf, cfg); + if (ret) + return -ENOSPC; + + if (!(pfvf->netdev->flags & IFF_UP)) { + otx2_qos_txschq_fill_cfg(pfvf, node, cfg); + return 0; + } + + ret = otx2_qos_txschq_update_config(pfvf, node, cfg); + if (ret) { + otx2_qos_free_cfg(pfvf, cfg); + return -EIO; + } + + otx2_qos_update_smq(pfvf, node, QOS_CFG_SQ); + + return 0; +} + +static int otx2_qos_update_tree(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct otx2_qos_cfg *cfg) +{ + otx2_qos_prepare_txschq_cfg(pfvf, node->parent, cfg); + return otx2_qos_push_txschq_cfg(pfvf, node->parent, cfg); +} + +static int otx2_qos_root_add(struct otx2_nic *pfvf, u16 htb_maj_id, u16 htb_defcls, + struct netlink_ext_ack *extack) +{ + struct otx2_qos_cfg *new_cfg; + struct otx2_qos_node *root; + int err; + + netdev_dbg(pfvf->netdev, + "TC_HTB_CREATE: handle=0x%x defcls=0x%x\n", + htb_maj_id, htb_defcls); + + root = otx2_qos_alloc_root(pfvf); + if (IS_ERR(root)) { + err = PTR_ERR(root); + return err; + } + + /* allocate txschq queue */ + new_cfg = kzalloc(sizeof(*new_cfg), GFP_KERNEL); + if (!new_cfg) { + NL_SET_ERR_MSG_MOD(extack, "Memory allocation error"); + err = -ENOMEM; + goto free_root_node; + } + /* allocate htb root node */ + new_cfg->schq[root->level] = 1; + err = otx2_qos_txschq_alloc(pfvf, new_cfg); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error allocating txschq"); + goto free_root_node; + } + + if (!(pfvf->netdev->flags & IFF_UP) || + root->level == NIX_TXSCH_LVL_TL1) { + root->schq = new_cfg->schq_list[root->level][0]; + goto out; + } + + /* update the txschq configuration in hw */ + err = otx2_qos_txschq_update_root_cfg(pfvf, root, new_cfg); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Error updating txschq configuration"); + goto txschq_free; + } + +out: + WRITE_ONCE(pfvf->qos.defcls, htb_defcls); + /* Pairs with smp_load_acquire() in ndo_select_queue */ + smp_store_release(&pfvf->qos.maj_id, htb_maj_id); + kfree(new_cfg); + return 0; + +txschq_free: + otx2_qos_free_cfg(pfvf, new_cfg); +free_root_node: + kfree(new_cfg); + otx2_qos_sw_node_delete(pfvf, root); + return err; +} + +static int otx2_qos_root_destroy(struct otx2_nic *pfvf) +{ + struct otx2_qos_node *root; + + netdev_dbg(pfvf->netdev, "TC_HTB_DESTROY\n"); + + /* find root node */ + root = otx2_sw_node_find(pfvf, OTX2_QOS_ROOT_CLASSID); + if (!root) + return -ENOENT; + + /* free the hw mappings */ + otx2_qos_destroy_node(pfvf, root); + + return 0; +} + +static int otx2_qos_validate_configuration(struct otx2_qos_node *parent, + struct netlink_ext_ack *extack, + struct otx2_nic *pfvf, + u64 prio) +{ + if (test_bit(prio, parent->prio_bmap)) { + NL_SET_ERR_MSG_MOD(extack, + "Static priority child with same priority exists"); + return -EEXIST; + } + + if (prio == TXSCH_TL1_DFLT_RR_PRIO) { + NL_SET_ERR_MSG_MOD(extack, + "Priority is reserved for Round Robin"); + return -EINVAL; + } + + return 0; +} + +static int otx2_qos_leaf_alloc_queue(struct otx2_nic *pfvf, u16 classid, + u32 parent_classid, u64 rate, u64 ceil, + u64 prio, struct netlink_ext_ack *extack) +{ + struct otx2_qos_cfg *old_cfg, *new_cfg; + struct otx2_qos_node *node, *parent; + int qid, ret, err; + + netdev_dbg(pfvf->netdev, + "TC_HTB_LEAF_ALLOC_QUEUE: classid=0x%x parent_classid=0x%x rate=%lld ceil=%lld prio=%lld\n", + classid, parent_classid, rate, ceil, prio); + + if (prio > OTX2_QOS_MAX_PRIO) { + NL_SET_ERR_MSG_MOD(extack, "Valid priority range 0 to 7"); + ret = -EOPNOTSUPP; + goto out; + } + + /* get parent node */ + parent = otx2_sw_node_find(pfvf, parent_classid); + if (!parent) { + NL_SET_ERR_MSG_MOD(extack, "parent node not found"); + ret = -ENOENT; + goto out; + } + if (parent->level == NIX_TXSCH_LVL_MDQ) { + NL_SET_ERR_MSG_MOD(extack, "HTB qos max levels reached"); + ret = -EOPNOTSUPP; + goto out; + } + + ret = otx2_qos_validate_configuration(parent, extack, pfvf, prio); + if (ret) + goto out; + + set_bit(prio, parent->prio_bmap); + + /* read current txschq configuration */ + old_cfg = kzalloc(sizeof(*old_cfg), GFP_KERNEL); + if (!old_cfg) { + NL_SET_ERR_MSG_MOD(extack, "Memory allocation error"); + ret = -ENOMEM; + goto reset_prio; + } + otx2_qos_read_txschq_cfg(pfvf, parent, old_cfg); + + /* allocate a new sq */ + qid = otx2_qos_get_qid(pfvf); + if (qid < 0) { + NL_SET_ERR_MSG_MOD(extack, "Reached max supported QOS SQ's"); + ret = -ENOMEM; + goto free_old_cfg; + } + + /* Actual SQ mapping will be updated after SMQ alloc */ + pfvf->qos.qid_to_sqmap[qid] = OTX2_QOS_INVALID_SQ; + + /* allocate and initialize a new child node */ + node = otx2_qos_sw_create_leaf_node(pfvf, parent, classid, prio, rate, + ceil, qid); + if (IS_ERR(node)) { + NL_SET_ERR_MSG_MOD(extack, "Unable to allocate leaf node"); + ret = PTR_ERR(node); + goto free_old_cfg; + } + + /* push new txschq config to hw */ + new_cfg = kzalloc(sizeof(*new_cfg), GFP_KERNEL); + if (!new_cfg) { + NL_SET_ERR_MSG_MOD(extack, "Memory allocation error"); + ret = -ENOMEM; + goto free_node; + } + ret = otx2_qos_update_tree(pfvf, node, new_cfg); + if (ret) { + NL_SET_ERR_MSG_MOD(extack, "HTB HW configuration error"); + kfree(new_cfg); + otx2_qos_sw_node_delete(pfvf, node); + /* restore the old qos tree */ + err = otx2_qos_txschq_update_config(pfvf, parent, old_cfg); + if (err) { + netdev_err(pfvf->netdev, + "Failed to restore txcshq configuration"); + goto free_old_cfg; + } + + otx2_qos_update_smq(pfvf, parent, QOS_CFG_SQ); + goto free_old_cfg; + } + + /* update tx_real_queues */ + otx2_qos_update_tx_netdev_queues(pfvf); + + /* free new txschq config */ + kfree(new_cfg); + + /* free old txschq config */ + otx2_qos_free_cfg(pfvf, old_cfg); + kfree(old_cfg); + + return pfvf->hw.tx_queues + qid; + +free_node: + otx2_qos_sw_node_delete(pfvf, node); +free_old_cfg: + kfree(old_cfg); +reset_prio: + clear_bit(prio, parent->prio_bmap); +out: + return ret; +} + +static int otx2_qos_leaf_to_inner(struct otx2_nic *pfvf, u16 classid, + u16 child_classid, u64 rate, u64 ceil, u64 prio, + struct netlink_ext_ack *extack) +{ + struct otx2_qos_cfg *old_cfg, *new_cfg; + struct otx2_qos_node *node, *child; + int ret, err; + u16 qid; + + netdev_dbg(pfvf->netdev, + "TC_HTB_LEAF_TO_INNER classid %04x, child %04x, rate %llu, ceil %llu\n", + classid, child_classid, rate, ceil); + + if (prio > OTX2_QOS_MAX_PRIO) { + NL_SET_ERR_MSG_MOD(extack, "Valid priority range 0 to 7"); + ret = -EOPNOTSUPP; + goto out; + } + + /* find node related to classid */ + node = otx2_sw_node_find(pfvf, classid); + if (!node) { + NL_SET_ERR_MSG_MOD(extack, "HTB node not found"); + ret = -ENOENT; + goto out; + } + /* check max qos txschq level */ + if (node->level == NIX_TXSCH_LVL_MDQ) { + NL_SET_ERR_MSG_MOD(extack, "HTB qos level not supported"); + ret = -EOPNOTSUPP; + goto out; + } + + set_bit(prio, node->prio_bmap); + + /* store the qid to assign to leaf node */ + qid = node->qid; + + /* read current txschq configuration */ + old_cfg = kzalloc(sizeof(*old_cfg), GFP_KERNEL); + if (!old_cfg) { + NL_SET_ERR_MSG_MOD(extack, "Memory allocation error"); + ret = -ENOMEM; + goto reset_prio; + } + otx2_qos_read_txschq_cfg(pfvf, node, old_cfg); + + /* delete the txschq nodes allocated for this node */ + otx2_qos_free_sw_node_schq(pfvf, node); + + /* mark this node as htb inner node */ + WRITE_ONCE(node->qid, OTX2_QOS_QID_INNER); + + /* allocate and initialize a new child node */ + child = otx2_qos_sw_create_leaf_node(pfvf, node, child_classid, + prio, rate, ceil, qid); + if (IS_ERR(child)) { + NL_SET_ERR_MSG_MOD(extack, "Unable to allocate leaf node"); + ret = PTR_ERR(child); + goto free_old_cfg; + } + + /* push new txschq config to hw */ + new_cfg = kzalloc(sizeof(*new_cfg), GFP_KERNEL); + if (!new_cfg) { + NL_SET_ERR_MSG_MOD(extack, "Memory allocation error"); + ret = -ENOMEM; + goto free_node; + } + ret = otx2_qos_update_tree(pfvf, child, new_cfg); + if (ret) { + NL_SET_ERR_MSG_MOD(extack, "HTB HW configuration error"); + kfree(new_cfg); + otx2_qos_sw_node_delete(pfvf, child); + /* restore the old qos tree */ + WRITE_ONCE(node->qid, qid); + err = otx2_qos_alloc_txschq_node(pfvf, node); + if (err) { + netdev_err(pfvf->netdev, + "Failed to restore old leaf node"); + goto free_old_cfg; + } + err = otx2_qos_txschq_update_config(pfvf, node, old_cfg); + if (err) { + netdev_err(pfvf->netdev, + "Failed to restore txcshq configuration"); + goto free_old_cfg; + } + otx2_qos_update_smq(pfvf, node, QOS_CFG_SQ); + goto free_old_cfg; + } + + /* free new txschq config */ + kfree(new_cfg); + + /* free old txschq config */ + otx2_qos_free_cfg(pfvf, old_cfg); + kfree(old_cfg); + + return 0; + +free_node: + otx2_qos_sw_node_delete(pfvf, child); +free_old_cfg: + kfree(old_cfg); +reset_prio: + clear_bit(prio, node->prio_bmap); +out: + return ret; +} + +static int otx2_qos_leaf_del(struct otx2_nic *pfvf, u16 *classid, + struct netlink_ext_ack *extack) +{ + struct otx2_qos_node *node, *parent; + u64 prio; + u16 qid; + + netdev_dbg(pfvf->netdev, "TC_HTB_LEAF_DEL classid %04x\n", *classid); + + /* find node related to classid */ + node = otx2_sw_node_find(pfvf, *classid); + if (!node) { + NL_SET_ERR_MSG_MOD(extack, "HTB node not found"); + return -ENOENT; + } + parent = node->parent; + prio = node->prio; + qid = node->qid; + + otx2_qos_disable_sq(pfvf, node->qid); + + otx2_qos_destroy_node(pfvf, node); + pfvf->qos.qid_to_sqmap[qid] = OTX2_QOS_INVALID_SQ; + + clear_bit(prio, parent->prio_bmap); + + return 0; +} + +static int otx2_qos_leaf_del_last(struct otx2_nic *pfvf, u16 classid, bool force, + struct netlink_ext_ack *extack) +{ + struct otx2_qos_node *node, *parent; + struct otx2_qos_cfg *new_cfg; + u64 prio; + int err; + u16 qid; + + netdev_dbg(pfvf->netdev, + "TC_HTB_LEAF_DEL_LAST classid %04x\n", classid); + + /* find node related to classid */ + node = otx2_sw_node_find(pfvf, classid); + if (!node) { + NL_SET_ERR_MSG_MOD(extack, "HTB node not found"); + return -ENOENT; + } + + /* save qid for use by parent */ + qid = node->qid; + prio = node->prio; + + parent = otx2_sw_node_find(pfvf, node->parent->classid); + if (!parent) { + NL_SET_ERR_MSG_MOD(extack, "parent node not found"); + return -ENOENT; + } + + /* destroy the leaf node */ + otx2_qos_destroy_node(pfvf, node); + pfvf->qos.qid_to_sqmap[qid] = OTX2_QOS_INVALID_SQ; + + clear_bit(prio, parent->prio_bmap); + + /* create downstream txschq entries to parent */ + err = otx2_qos_alloc_txschq_node(pfvf, parent); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "HTB failed to create txsch configuration"); + return err; + } + WRITE_ONCE(parent->qid, qid); + __set_bit(qid, pfvf->qos.qos_sq_bmap); + + /* push new txschq config to hw */ + new_cfg = kzalloc(sizeof(*new_cfg), GFP_KERNEL); + if (!new_cfg) { + NL_SET_ERR_MSG_MOD(extack, "Memory allocation error"); + return -ENOMEM; + } + /* fill txschq cfg and push txschq cfg to hw */ + otx2_qos_fill_cfg_schq(parent, new_cfg); + err = otx2_qos_push_txschq_cfg(pfvf, parent, new_cfg); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "HTB HW configuration error"); + kfree(new_cfg); + return err; + } + kfree(new_cfg); + + /* update tx_real_queues */ + otx2_qos_update_tx_netdev_queues(pfvf); + + return 0; +} + +void otx2_clean_qos_queues(struct otx2_nic *pfvf) +{ + struct otx2_qos_node *root; + + root = otx2_sw_node_find(pfvf, OTX2_QOS_ROOT_CLASSID); + if (!root) + return; + + otx2_qos_update_smq(pfvf, root, QOS_SMQ_FLUSH); +} + +void otx2_qos_config_txschq(struct otx2_nic *pfvf) +{ + struct otx2_qos_node *root; + int err; + + root = otx2_sw_node_find(pfvf, OTX2_QOS_ROOT_CLASSID); + if (!root) + return; + + err = otx2_qos_txschq_config(pfvf, root); + if (err) { + netdev_err(pfvf->netdev, "Error update txschq configuration\n"); + goto root_destroy; + } + + err = otx2_qos_txschq_push_cfg_tl(pfvf, root, NULL); + if (err) { + netdev_err(pfvf->netdev, "Error update txschq configuration\n"); + goto root_destroy; + } + + otx2_qos_update_smq(pfvf, root, QOS_CFG_SQ); + return; + +root_destroy: + netdev_err(pfvf->netdev, "Failed to update Scheduler/Shaping config in Hardware\n"); + /* Free resources allocated */ + otx2_qos_root_destroy(pfvf); +} + +int otx2_setup_tc_htb(struct net_device *ndev, struct tc_htb_qopt_offload *htb) +{ + struct otx2_nic *pfvf = netdev_priv(ndev); + int res; + + switch (htb->command) { + case TC_HTB_CREATE: + return otx2_qos_root_add(pfvf, htb->parent_classid, + htb->classid, htb->extack); + case TC_HTB_DESTROY: + return otx2_qos_root_destroy(pfvf); + case TC_HTB_LEAF_ALLOC_QUEUE: + res = otx2_qos_leaf_alloc_queue(pfvf, htb->classid, + htb->parent_classid, + htb->rate, htb->ceil, + htb->prio, htb->extack); + if (res < 0) + return res; + htb->qid = res; + return 0; + case TC_HTB_LEAF_TO_INNER: + return otx2_qos_leaf_to_inner(pfvf, htb->parent_classid, + htb->classid, htb->rate, + htb->ceil, htb->prio, + htb->extack); + case TC_HTB_LEAF_DEL: + return otx2_qos_leaf_del(pfvf, &htb->classid, htb->extack); + case TC_HTB_LEAF_DEL_LAST: + case TC_HTB_LEAF_DEL_LAST_FORCE: + return otx2_qos_leaf_del_last(pfvf, htb->classid, + htb->command == TC_HTB_LEAF_DEL_LAST_FORCE, + htb->extack); + case TC_HTB_LEAF_QUERY_QUEUE: + res = otx2_get_txq_by_classid(pfvf, htb->classid); + htb->qid = res; + return 0; + case TC_HTB_NODE_MODIFY: + fallthrough; + default: + return -EOPNOTSUPP; + } +} diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.h b/drivers/net/ethernet/marvell/octeontx2/nic/qos.h new file mode 100644 index 000000000000..19773284be27 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell RVU Ethernet driver + * + * Copyright (C) 2023 Marvell. + * + */ +#ifndef OTX2_QOS_H +#define OTX2_QOS_H + +#include <linux/types.h> +#include <linux/netdevice.h> +#include <linux/rhashtable.h> + +#define OTX2_QOS_MAX_LVL 4 +#define OTX2_QOS_MAX_PRIO 7 +#define OTX2_QOS_MAX_LEAF_NODES 16 + +enum qos_smq_operations { + QOS_CFG_SQ, + QOS_SMQ_FLUSH, +}; + +u64 otx2_get_txschq_rate_regval(struct otx2_nic *nic, u64 maxrate, u32 burst); + +int otx2_setup_tc_htb(struct net_device *ndev, struct tc_htb_qopt_offload *htb); +int otx2_qos_get_qid(struct otx2_nic *pfvf); +void otx2_qos_free_qid(struct otx2_nic *pfvf, int qidx); +int otx2_qos_enable_sq(struct otx2_nic *pfvf, int qidx); +void otx2_qos_disable_sq(struct otx2_nic *pfvf, int qidx); + +struct otx2_qos_cfg { + u16 schq[NIX_TXSCH_LVL_CNT]; + u16 schq_contig[NIX_TXSCH_LVL_CNT]; + int static_node_pos[NIX_TXSCH_LVL_CNT]; + int dwrr_node_pos[NIX_TXSCH_LVL_CNT]; + u16 schq_contig_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC]; + u16 schq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC]; +}; + +struct otx2_qos { + DECLARE_HASHTABLE(qos_hlist, order_base_2(OTX2_QOS_MAX_LEAF_NODES)); + struct mutex qos_lock; /* child list lock */ + u16 qid_to_sqmap[OTX2_QOS_MAX_LEAF_NODES]; + struct list_head qos_tree; + DECLARE_BITMAP(qos_sq_bmap, OTX2_QOS_MAX_LEAF_NODES); + u16 maj_id; + u16 defcls; + u8 link_cfg_lvl; /* LINKX_CFG CSRs mapped to TL3 or TL2's index ? */ +}; + +struct otx2_qos_node { + struct list_head list; /* list management */ + struct list_head child_list; + struct list_head child_schq_list; + struct hlist_node hlist; + DECLARE_BITMAP(prio_bmap, OTX2_QOS_MAX_PRIO + 1); + struct otx2_qos_node *parent; /* parent qos node */ + u64 rate; /* htb params */ + u64 ceil; + u32 classid; + u32 prio; + u16 schq; /* hw txschq */ + u16 qid; + u16 prio_anchor; + u8 level; +}; + + +#endif diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c new file mode 100644 index 000000000000..9d887bfc3108 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell RVU Physical Function ethernet driver + * + * Copyright (C) 2023 Marvell. + * + */ + +#include <linux/netdevice.h> +#include <net/tso.h> + +#include "cn10k.h" +#include "otx2_reg.h" +#include "otx2_common.h" +#include "otx2_txrx.h" +#include "otx2_struct.h" + +#define OTX2_QOS_MAX_LEAF_NODES 16 + +static void otx2_qos_aura_pool_free(struct otx2_nic *pfvf, int pool_id) +{ + struct otx2_pool *pool; + + if (!pfvf->qset.pool) + return; + + pool = &pfvf->qset.pool[pool_id]; + qmem_free(pfvf->dev, pool->stack); + qmem_free(pfvf->dev, pool->fc_addr); + pool->stack = NULL; + pool->fc_addr = NULL; +} + +static int otx2_qos_sq_aura_pool_init(struct otx2_nic *pfvf, int qidx) +{ + struct otx2_qset *qset = &pfvf->qset; + int pool_id, stack_pages, num_sqbs; + struct otx2_hw *hw = &pfvf->hw; + struct otx2_snd_queue *sq; + struct otx2_pool *pool; + dma_addr_t bufptr; + int err, ptr; + u64 iova, pa; + + /* Calculate number of SQBs needed. + * + * For a 128byte SQE, and 4K size SQB, 31 SQEs will fit in one SQB. + * Last SQE is used for pointing to next SQB. + */ + num_sqbs = (hw->sqb_size / 128) - 1; + num_sqbs = (qset->sqe_cnt + num_sqbs) / num_sqbs; + + /* Get no of stack pages needed */ + stack_pages = + (num_sqbs + hw->stack_pg_ptrs - 1) / hw->stack_pg_ptrs; + + pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx); + pool = &pfvf->qset.pool[pool_id]; + + /* Initialize aura context */ + err = otx2_aura_init(pfvf, pool_id, pool_id, num_sqbs); + if (err) + return err; + + /* Initialize pool context */ + err = otx2_pool_init(pfvf, pool_id, stack_pages, + num_sqbs, hw->sqb_size, AURA_NIX_SQ); + if (err) + goto aura_free; + + /* Flush accumulated messages */ + err = otx2_sync_mbox_msg(&pfvf->mbox); + if (err) + goto pool_free; + + /* Allocate pointers and free them to aura/pool */ + sq = &qset->sq[qidx]; + sq->sqb_count = 0; + sq->sqb_ptrs = kcalloc(num_sqbs, sizeof(*sq->sqb_ptrs), GFP_KERNEL); + if (!sq->sqb_ptrs) { + err = -ENOMEM; + goto pool_free; + } + + for (ptr = 0; ptr < num_sqbs; ptr++) { + err = otx2_alloc_rbuf(pfvf, pool, &bufptr); + if (err) + goto sqb_free; + pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr); + sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr; + } + + return 0; + +sqb_free: + while (ptr--) { + if (!sq->sqb_ptrs[ptr]) + continue; + iova = sq->sqb_ptrs[ptr]; + pa = otx2_iova_to_phys(pfvf->iommu_domain, iova); + dma_unmap_page_attrs(pfvf->dev, iova, hw->sqb_size, + DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + put_page(virt_to_page(phys_to_virt(pa))); + otx2_aura_allocptr(pfvf, pool_id); + } + sq->sqb_count = 0; + kfree(sq->sqb_ptrs); +pool_free: + qmem_free(pfvf->dev, pool->stack); +aura_free: + qmem_free(pfvf->dev, pool->fc_addr); + otx2_mbox_reset(&pfvf->mbox.mbox, 0); + return err; +} + +static void otx2_qos_sq_free_sqbs(struct otx2_nic *pfvf, int qidx) +{ + struct otx2_qset *qset = &pfvf->qset; + struct otx2_hw *hw = &pfvf->hw; + struct otx2_snd_queue *sq; + u64 iova, pa; + int sqb; + + sq = &qset->sq[qidx]; + if (!sq->sqb_ptrs) + return; + for (sqb = 0; sqb < sq->sqb_count; sqb++) { + if (!sq->sqb_ptrs[sqb]) + continue; + iova = sq->sqb_ptrs[sqb]; + pa = otx2_iova_to_phys(pfvf->iommu_domain, iova); + dma_unmap_page_attrs(pfvf->dev, iova, hw->sqb_size, + DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + put_page(virt_to_page(phys_to_virt(pa))); + } + + sq->sqb_count = 0; + + sq = &qset->sq[qidx]; + qmem_free(pfvf->dev, sq->sqe); + qmem_free(pfvf->dev, sq->tso_hdrs); + kfree(sq->sg); + kfree(sq->sqb_ptrs); + qmem_free(pfvf->dev, sq->timestamps); + + memset((void *)sq, 0, sizeof(*sq)); +} + +/* send queue id */ +static void otx2_qos_sqb_flush(struct otx2_nic *pfvf, int qidx) +{ + int sqe_tail, sqe_head; + u64 incr, *ptr, val; + + ptr = (__force u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS); + incr = (u64)qidx << 32; + val = otx2_atomic64_add(incr, ptr); + sqe_head = (val >> 20) & 0x3F; + sqe_tail = (val >> 28) & 0x3F; + if (sqe_head != sqe_tail) + usleep_range(50, 60); +} + +static int otx2_qos_ctx_disable(struct otx2_nic *pfvf, u16 qidx, int aura_id) +{ + struct nix_cn10k_aq_enq_req *cn10k_sq_aq; + struct npa_aq_enq_req *aura_aq; + struct npa_aq_enq_req *pool_aq; + struct nix_aq_enq_req *sq_aq; + + if (test_bit(CN10K_LMTST, &pfvf->hw.cap_flag)) { + cn10k_sq_aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox); + if (!cn10k_sq_aq) + return -ENOMEM; + cn10k_sq_aq->qidx = qidx; + cn10k_sq_aq->sq.ena = 0; + cn10k_sq_aq->sq_mask.ena = 1; + cn10k_sq_aq->ctype = NIX_AQ_CTYPE_SQ; + cn10k_sq_aq->op = NIX_AQ_INSTOP_WRITE; + } else { + sq_aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox); + if (!sq_aq) + return -ENOMEM; + sq_aq->qidx = qidx; + sq_aq->sq.ena = 0; + sq_aq->sq_mask.ena = 1; + sq_aq->ctype = NIX_AQ_CTYPE_SQ; + sq_aq->op = NIX_AQ_INSTOP_WRITE; + } + + aura_aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox); + if (!aura_aq) { + otx2_mbox_reset(&pfvf->mbox.mbox, 0); + return -ENOMEM; + } + + aura_aq->aura_id = aura_id; + aura_aq->aura.ena = 0; + aura_aq->aura_mask.ena = 1; + aura_aq->ctype = NPA_AQ_CTYPE_AURA; + aura_aq->op = NPA_AQ_INSTOP_WRITE; + + pool_aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox); + if (!pool_aq) { + otx2_mbox_reset(&pfvf->mbox.mbox, 0); + return -ENOMEM; + } + + pool_aq->aura_id = aura_id; + pool_aq->pool.ena = 0; + pool_aq->pool_mask.ena = 1; + + pool_aq->ctype = NPA_AQ_CTYPE_POOL; + pool_aq->op = NPA_AQ_INSTOP_WRITE; + + return otx2_sync_mbox_msg(&pfvf->mbox); +} + +int otx2_qos_get_qid(struct otx2_nic *pfvf) +{ + int qidx; + + qidx = find_first_zero_bit(pfvf->qos.qos_sq_bmap, + pfvf->hw.tc_tx_queues); + + return qidx == pfvf->hw.tc_tx_queues ? -ENOSPC : qidx; +} + +void otx2_qos_free_qid(struct otx2_nic *pfvf, int qidx) +{ + clear_bit(qidx, pfvf->qos.qos_sq_bmap); +} + +int otx2_qos_enable_sq(struct otx2_nic *pfvf, int qidx) +{ + struct otx2_hw *hw = &pfvf->hw; + int pool_id, sq_idx, err; + + if (pfvf->flags & OTX2_FLAG_INTF_DOWN) + return -EPERM; + + sq_idx = hw->non_qos_queues + qidx; + + mutex_lock(&pfvf->mbox.lock); + err = otx2_qos_sq_aura_pool_init(pfvf, sq_idx); + if (err) + goto out; + + pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, sq_idx); + err = otx2_sq_init(pfvf, sq_idx, pool_id); + if (err) + goto out; +out: + mutex_unlock(&pfvf->mbox.lock); + return err; +} + +void otx2_qos_disable_sq(struct otx2_nic *pfvf, int qidx) +{ + struct otx2_qset *qset = &pfvf->qset; + struct otx2_hw *hw = &pfvf->hw; + struct otx2_snd_queue *sq; + struct otx2_cq_queue *cq; + int pool_id, sq_idx; + + sq_idx = hw->non_qos_queues + qidx; + + /* If the DOWN flag is set SQs are already freed */ + if (pfvf->flags & OTX2_FLAG_INTF_DOWN) + return; + + sq = &pfvf->qset.sq[sq_idx]; + if (!sq->sqb_ptrs) + return; + + if (sq_idx < hw->non_qos_queues || + sq_idx >= otx2_get_total_tx_queues(pfvf)) { + netdev_err(pfvf->netdev, "Send Queue is not a QoS queue\n"); + return; + } + + cq = &qset->cq[pfvf->hw.rx_queues + sq_idx]; + pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, sq_idx); + + otx2_qos_sqb_flush(pfvf, sq_idx); + otx2_smq_flush(pfvf, otx2_get_smq_idx(pfvf, sq_idx)); + otx2_cleanup_tx_cqes(pfvf, cq); + + mutex_lock(&pfvf->mbox.lock); + otx2_qos_ctx_disable(pfvf, sq_idx, pool_id); + mutex_unlock(&pfvf->mbox.lock); + + otx2_qos_sq_free_sqbs(pfvf, sq_idx); + otx2_qos_aura_pool_free(pfvf, pool_id); +} diff --git a/drivers/net/ethernet/marvell/prestera/prestera_flower.c b/drivers/net/ethernet/marvell/prestera/prestera_flower.c index 91a478b75cbf..3e20e71b0f81 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_flower.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_flower.c @@ -148,6 +148,12 @@ static int prestera_flower_parse_meta(struct prestera_acl_rule *rule, __be16 key, mask; flow_rule_match_meta(f_rule, &match); + + if (match.mask->l2_miss) { + NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on \"l2_miss\""); + return -EOPNOTSUPP; + } + if (match.mask->ingress_ifindex != 0xFFFFFFFF) { NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported ingress ifindex mask"); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 9d504142e51a..4fb886c57cd7 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -300,8 +300,7 @@ static void prestera_pcs_get_state(struct phylink_pcs *pcs, } } -static int prestera_pcs_config(struct phylink_pcs *pcs, - unsigned int mode, +static int prestera_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, bool permit_pause_to_mac) @@ -316,30 +315,25 @@ static int prestera_pcs_config(struct phylink_pcs *pcs, cfg_mac.admin = true; cfg_mac.fec = PRESTERA_PORT_FEC_OFF; + cfg_mac.inband = neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED; switch (interface) { case PHY_INTERFACE_MODE_10GBASER: cfg_mac.speed = SPEED_10000; - cfg_mac.inband = 0; cfg_mac.mode = PRESTERA_MAC_MODE_SR_LR; break; case PHY_INTERFACE_MODE_2500BASEX: cfg_mac.speed = SPEED_2500; cfg_mac.duplex = DUPLEX_FULL; - cfg_mac.inband = test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, - advertising); cfg_mac.mode = PRESTERA_MAC_MODE_SGMII; break; case PHY_INTERFACE_MODE_SGMII: - cfg_mac.inband = 1; cfg_mac.mode = PRESTERA_MAC_MODE_SGMII; break; case PHY_INTERFACE_MODE_1000BASEX: default: cfg_mac.speed = SPEED_1000; cfg_mac.duplex = DUPLEX_FULL; - cfg_mac.inband = test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, - advertising); cfg_mac.mode = PRESTERA_MAC_MODE_1000BASE_X; break; } @@ -401,6 +395,7 @@ static int prestera_port_sfp_bind(struct prestera_port *port) continue; port->phylink_pcs.ops = &prestera_pcs_ops; + port->phylink_pcs.neg_mode = true; port->phy_config.dev = &port->dev->dev; port->phy_config.type = PHYLINK_NETDEV; diff --git a/drivers/net/ethernet/marvell/prestera/prestera_pci.c b/drivers/net/ethernet/marvell/prestera/prestera_pci.c index f328d957b2db..35857dc19542 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_pci.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_pci.c @@ -727,7 +727,8 @@ pick_fw_ver: err = request_firmware_direct(&fw->bin, fw_path, fw->dev.dev); if (err) { - if (ver_maj == PRESTERA_SUPP_FW_MAJ_VER) { + if (ver_maj != PRESTERA_PREV_FW_MAJ_VER || + ver_min != PRESTERA_PREV_FW_MIN_VER) { ver_maj = PRESTERA_PREV_FW_MAJ_VER; ver_min = PRESTERA_PREV_FW_MIN_VER; diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router.c b/drivers/net/ethernet/marvell/prestera/prestera_router.c index a9a1028cb17b..de317179a7dc 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_router.c @@ -166,11 +166,11 @@ prestera_util_neigh2nc_key(struct prestera_switch *sw, struct neighbour *n, static bool __prestera_fi_is_direct(struct fib_info *fi) { - struct fib_nh *fib_nh; + struct fib_nh_common *fib_nhc; if (fib_info_num_path(fi) == 1) { - fib_nh = fib_info_nh(fi, 0); - if (fib_nh->fib_nh_gw_family == AF_UNSPEC) + fib_nhc = fib_info_nhc(fi, 0); + if (fib_nhc->nhc_gw_family == AF_UNSPEC) return true; } @@ -261,7 +261,7 @@ static bool __prestera_util_kern_n_is_reachable_v4(u32 tb_id, __be32 *addr, struct net_device *dev) { - struct fib_nh *fib_nh; + struct fib_nh_common *fib_nhc; struct fib_result res; bool reachable; @@ -269,8 +269,8 @@ __prestera_util_kern_n_is_reachable_v4(u32 tb_id, __be32 *addr, if (!prestera_util_kern_get_route(&res, tb_id, addr)) if (prestera_fi_is_direct(res.fi)) { - fib_nh = fib_info_nh(res.fi, 0); - if (dev == fib_nh->fib_nh_dev) + fib_nhc = fib_info_nhc(res.fi, 0); + if (dev == fib_nhc->nhc_dev) reachable = true; } @@ -324,7 +324,7 @@ prestera_kern_fib_info_nhc(struct fib_notifier_info *info, int n) if (info->family == AF_INET) { fen4_info = container_of(info, struct fib_entry_notifier_info, info); - return &fib_info_nh(fen4_info->fi, n)->nh_common; + return fib_info_nhc(fen4_info->fi, n); } else if (info->family == AF_INET6) { fen6_info = container_of(info, struct fib6_entry_notifier_info, info); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 834c644b67db..7f0807672071 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2860,6 +2860,9 @@ static int mtk_hwlro_get_fdir_all(struct net_device *dev, int i; for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) { + if (cnt == cmd->rule_cnt) + return -EMSGSIZE; + if (mac->hwlro_ip[i]) { rule_locs[cnt] = i; cnt++; @@ -3846,23 +3849,6 @@ static int mtk_hw_deinit(struct mtk_eth *eth) return 0; } -static int __init mtk_init(struct net_device *dev) -{ - struct mtk_mac *mac = netdev_priv(dev); - struct mtk_eth *eth = mac->hw; - int ret; - - ret = of_get_ethdev_address(mac->of_node, dev); - if (ret) { - /* If the mac address is invalid, use random mac address */ - eth_hw_addr_random(dev); - dev_err(eth->dev, "generated random MAC address %pM\n", - dev->dev_addr); - } - - return 0; -} - static void mtk_uninit(struct net_device *dev) { struct mtk_mac *mac = netdev_priv(dev); @@ -4278,7 +4264,6 @@ static const struct ethtool_ops mtk_ethtool_ops = { }; static const struct net_device_ops mtk_netdev_ops = { - .ndo_init = mtk_init, .ndo_uninit = mtk_uninit, .ndo_open = mtk_open, .ndo_stop = mtk_stop, @@ -4340,6 +4325,17 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) mac->hw = eth; mac->of_node = np; + err = of_get_ethdev_address(mac->of_node, eth->netdev[id]); + if (err == -EPROBE_DEFER) + return err; + + if (err) { + /* If the mac address is invalid, use random mac address */ + eth_hw_addr_random(eth->netdev[id]); + dev_err(eth->dev, "generated random MAC address %pM\n", + eth->netdev[id]->dev_addr); + } + memset(mac->hwlro_ip, 0, sizeof(mac->hwlro_ip)); mac->hwlro_ip_cnt = 0; diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c index 316fe2e70fea..1a97feca77f2 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c @@ -98,7 +98,7 @@ mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind) acct = mtk_foe_entry_get_mib(ppe, i, NULL); - type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1); + type = mtk_get_ib1_pkt_type(ppe->eth, entry->ib1); seq_printf(m, "%05x %s %7s", i, mtk_foe_entry_state_str(state), mtk_foe_pkt_type_str(type)); diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index 985cff910f30..3b651efcc25e 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -221,9 +221,13 @@ void mtk_wed_fe_reset(void) for (i = 0; i < ARRAY_SIZE(hw_list); i++) { struct mtk_wed_hw *hw = hw_list[i]; - struct mtk_wed_device *dev = hw->wed_dev; + struct mtk_wed_device *dev; int err; + if (!hw) + break; + + dev = hw->wed_dev; if (!dev || !dev->wlan.reset) continue; @@ -244,8 +248,12 @@ void mtk_wed_fe_reset_complete(void) for (i = 0; i < ARRAY_SIZE(hw_list); i++) { struct mtk_wed_hw *hw = hw_list[i]; - struct mtk_wed_device *dev = hw->wed_dev; + struct mtk_wed_device *dev; + + if (!hw) + break; + dev = hw->wed_dev; if (!dev || !dev->wlan.reset_complete) continue; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 277738c50c56..61286b0d9b0c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1374,16 +1374,13 @@ static int mlx4_mf_bond(struct mlx4_dev *dev) int nvfs; struct mlx4_slaves_pport slaves_port1; struct mlx4_slaves_pport slaves_port2; - DECLARE_BITMAP(slaves_port_1_2, MLX4_MFUNC_MAX); slaves_port1 = mlx4_phys_to_slaves_pport(dev, 1); slaves_port2 = mlx4_phys_to_slaves_pport(dev, 2); - bitmap_and(slaves_port_1_2, - slaves_port1.slaves, slaves_port2.slaves, - dev->persist->num_vfs + 1); /* only single port vfs are allowed */ - if (bitmap_weight(slaves_port_1_2, dev->persist->num_vfs + 1) > 1) { + if (bitmap_weight_and(slaves_port1.slaves, slaves_port2.slaves, + dev->persist->num_vfs + 1) > 1) { mlx4_warn(dev, "HA mode unsupported for dual ported VFs\n"); return -EINVAL; } @@ -3027,13 +3024,43 @@ no_msi: } } +static int mlx4_devlink_port_type_set(struct devlink_port *devlink_port, + enum devlink_port_type port_type) +{ + struct mlx4_port_info *info = container_of(devlink_port, + struct mlx4_port_info, + devlink_port); + enum mlx4_port_type mlx4_port_type; + + switch (port_type) { + case DEVLINK_PORT_TYPE_AUTO: + mlx4_port_type = MLX4_PORT_TYPE_AUTO; + break; + case DEVLINK_PORT_TYPE_ETH: + mlx4_port_type = MLX4_PORT_TYPE_ETH; + break; + case DEVLINK_PORT_TYPE_IB: + mlx4_port_type = MLX4_PORT_TYPE_IB; + break; + default: + return -EOPNOTSUPP; + } + + return __set_port_type(info, mlx4_port_type); +} + +static const struct devlink_port_ops mlx4_devlink_port_ops = { + .port_type_set = mlx4_devlink_port_type_set, +}; + static int mlx4_init_port_info(struct mlx4_dev *dev, int port) { struct devlink *devlink = priv_to_devlink(mlx4_priv(dev)); struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; int err; - err = devl_port_register(devlink, &info->devlink_port, port); + err = devl_port_register_with_ops(devlink, &info->devlink_port, port, + &mlx4_devlink_port_ops); if (err) return err; @@ -3877,31 +3904,6 @@ err_disable_pdev: return err; } -static int mlx4_devlink_port_type_set(struct devlink_port *devlink_port, - enum devlink_port_type port_type) -{ - struct mlx4_port_info *info = container_of(devlink_port, - struct mlx4_port_info, - devlink_port); - enum mlx4_port_type mlx4_port_type; - - switch (port_type) { - case DEVLINK_PORT_TYPE_AUTO: - mlx4_port_type = MLX4_PORT_TYPE_AUTO; - break; - case DEVLINK_PORT_TYPE_ETH: - mlx4_port_type = MLX4_PORT_TYPE_ETH; - break; - case DEVLINK_PORT_TYPE_IB: - mlx4_port_type = MLX4_PORT_TYPE_IB; - break; - default: - return -EOPNOTSUPP; - } - - return __set_port_type(info, mlx4_port_type); -} - static void mlx4_devlink_param_load_driverinit_values(struct devlink *devlink) { struct mlx4_priv *priv = devlink_priv(devlink); @@ -3986,7 +3988,6 @@ static int mlx4_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a } static const struct devlink_ops mlx4_devlink_ops = { - .port_type_set = mlx4_devlink_port_type_set, .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT), .reload_down = mlx4_devlink_reload_down, .reload_up = mlx4_devlink_reload_up, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index ddf1e352f51d..35f00700a4d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -75,7 +75,8 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \ esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \ esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o -mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o esw/bridge_mcast.o en/rep/bridge.o +mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o esw/bridge_mcast.o esw/bridge_debugfs.o \ + en/rep/bridge.o mlx5_core-$(CONFIG_THERMAL) += thermal.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index bb95b40d25eb..2138f28a2931 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -246,6 +246,7 @@ void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev) debugfs_create_u32("fw_pages_total", 0400, pages, &dev->priv.fw_pages); debugfs_create_u32("fw_pages_vfs", 0400, pages, &dev->priv.page_counters[MLX5_VF]); + debugfs_create_u32("fw_pages_ec_vfs", 0400, pages, &dev->priv.page_counters[MLX5_EC_VF]); debugfs_create_u32("fw_pages_sfs", 0400, pages, &dev->priv.page_counters[MLX5_SF]); debugfs_create_u32("fw_pages_host_pf", 0400, pages, &dev->priv.page_counters[MLX5_HOST_PF]); debugfs_create_u32("fw_pages_alloc_failed", 0400, pages, &dev->priv.fw_pages_alloc_failed); @@ -513,11 +514,11 @@ EXPORT_SYMBOL(mlx5_debug_qp_add); void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp) { - if (!mlx5_debugfs_root) + if (!mlx5_debugfs_root || !qp->dbg) return; - if (qp->dbg) - rem_res_tree(qp->dbg); + rem_res_tree(qp->dbg); + qp->dbg = NULL; } EXPORT_SYMBOL(mlx5_debug_qp_remove); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 1b33533b15de..edb06fb9bbc5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -151,12 +151,6 @@ static bool is_ib_rep_supported(struct mlx5_core_dev *dev) if (!is_eth_rep_supported(dev)) return false; - if (!MLX5_ESWITCH_MANAGER(dev)) - return false; - - if (!is_mdev_switchdev_mode(dev)) - return false; - if (mlx5_core_mp_enabled(dev)) return false; @@ -323,6 +317,18 @@ static void del_adev(struct auxiliary_device *adev) auxiliary_device_uninit(adev); } +void mlx5_dev_set_lightweight(struct mlx5_core_dev *dev) +{ + mutex_lock(&mlx5_intf_mutex); + dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; + mutex_unlock(&mlx5_intf_mutex); +} + +bool mlx5_dev_is_lightweight(struct mlx5_core_dev *dev) +{ + return dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; +} + int mlx5_attach_device(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; @@ -457,6 +463,10 @@ static int add_drivers(struct mlx5_core_dev *dev) if (priv->adev[i]) continue; + if (mlx5_adev_devices[i].is_enabled && + !(mlx5_adev_devices[i].is_enabled(dev))) + continue; + if (mlx5_adev_devices[i].is_supported) is_supported = mlx5_adev_devices[i].is_supported(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 4b607785d694..3d82ec890666 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -7,7 +7,6 @@ #include "fw_reset.h" #include "fs_core.h" #include "eswitch.h" -#include "lag/lag.h" #include "esw/qos.h" #include "sf/dev/dev.h" #include "sf/sf.h" @@ -142,6 +141,13 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, bool sf_dev_allocated; int ret = 0; + if (mlx5_dev_is_lightweight(dev)) { + if (action != DEVLINK_RELOAD_ACTION_DRIVER_REINIT) + return -EOPNOTSUPP; + mlx5_unload_one_light(dev); + return 0; + } + sf_dev_allocated = mlx5_sf_dev_allocated(dev); if (sf_dev_allocated) { /* Reload results in deleting SF device which further results in @@ -162,9 +168,8 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, return -EOPNOTSUPP; } - if (pci_num_vf(pdev)) { + if (mlx5_core_is_pf(dev) && pci_num_vf(pdev)) NL_SET_ERR_MSG_MOD(extack, "reload while VFs are present is unfavorable"); - } switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: @@ -195,6 +200,10 @@ static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a *actions_performed = BIT(action); switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: + if (mlx5_dev_is_lightweight(dev)) { + mlx5_fw_reporters_create(dev); + return mlx5_init_one_devl_locked(dev); + } ret = mlx5_load_one_devl_locked(dev, false); break; case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: @@ -311,8 +320,6 @@ static const struct devlink_ops mlx5_devlink_ops = { .eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get, .eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set, .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get, - .port_function_hw_addr_get = mlx5_devlink_port_function_hw_addr_get, - .port_function_hw_addr_set = mlx5_devlink_port_function_hw_addr_set, .rate_leaf_tx_share_set = mlx5_esw_devlink_rate_leaf_tx_share_set, .rate_leaf_tx_max_set = mlx5_esw_devlink_rate_leaf_tx_max_set, .rate_node_tx_share_set = mlx5_esw_devlink_rate_node_tx_share_set, @@ -320,16 +327,9 @@ static const struct devlink_ops mlx5_devlink_ops = { .rate_node_new = mlx5_esw_devlink_rate_node_new, .rate_node_del = mlx5_esw_devlink_rate_node_del, .rate_leaf_parent_set = mlx5_esw_devlink_rate_parent_set, - .port_fn_roce_get = mlx5_devlink_port_fn_roce_get, - .port_fn_roce_set = mlx5_devlink_port_fn_roce_set, - .port_fn_migratable_get = mlx5_devlink_port_fn_migratable_get, - .port_fn_migratable_set = mlx5_devlink_port_fn_migratable_set, #endif #ifdef CONFIG_MLX5_SF_MANAGER .port_new = mlx5_devlink_sf_port_new, - .port_del = mlx5_devlink_sf_port_del, - .port_fn_state_get = mlx5_devlink_sf_port_fn_state_get, - .port_fn_state_set = mlx5_devlink_sf_port_fn_state_set, #endif .flash_update = mlx5_devlink_flash_update, .info_get = mlx5_devlink_info_get, @@ -437,54 +437,6 @@ static int mlx5_devlink_large_group_num_validate(struct devlink *devlink, u32 id return 0; } - -static int mlx5_devlink_esw_multiport_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) -{ - struct mlx5_core_dev *dev = devlink_priv(devlink); - - if (!MLX5_ESWITCH_MANAGER(dev)) - return -EOPNOTSUPP; - - if (ctx->val.vbool) - return mlx5_lag_mpesw_enable(dev); - - mlx5_lag_mpesw_disable(dev); - return 0; -} - -static int mlx5_devlink_esw_multiport_get(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) -{ - struct mlx5_core_dev *dev = devlink_priv(devlink); - - if (!MLX5_ESWITCH_MANAGER(dev)) - return -EOPNOTSUPP; - - ctx->val.vbool = mlx5_lag_is_mpesw(dev); - return 0; -} - -static int mlx5_devlink_esw_multiport_validate(struct devlink *devlink, u32 id, - union devlink_param_value val, - struct netlink_ext_ack *extack) -{ - struct mlx5_core_dev *dev = devlink_priv(devlink); - - if (!MLX5_ESWITCH_MANAGER(dev)) { - NL_SET_ERR_MSG_MOD(extack, "E-Switch is unsupported"); - return -EOPNOTSUPP; - } - - if (mlx5_eswitch_mode(dev) != MLX5_ESWITCH_OFFLOADS) { - NL_SET_ERR_MSG_MOD(extack, - "E-Switch must be in switchdev mode"); - return -EBUSY; - } - - return 0; -} - #endif static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id, @@ -558,12 +510,6 @@ static const struct devlink_param mlx5_devlink_params[] = { BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, mlx5_devlink_large_group_num_validate), - DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT, - "esw_multiport", DEVLINK_PARAM_TYPE_BOOL, - BIT(DEVLINK_PARAM_CMODE_RUNTIME), - mlx5_devlink_esw_multiport_get, - mlx5_devlink_esw_multiport_set, - mlx5_devlink_esw_multiport_validate), #endif DEVLINK_PARAM_GENERIC(IO_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, mlx5_devlink_eq_depth_validate), @@ -576,7 +522,7 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink) struct mlx5_core_dev *dev = devlink_priv(devlink); union devlink_param_value value; - value.vbool = MLX5_CAP_GEN(dev, roce); + value.vbool = MLX5_CAP_GEN(dev, roce) && !mlx5_dev_is_lightweight(dev); devl_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, value); @@ -626,7 +572,7 @@ static int mlx5_devlink_eth_params_register(struct devlink *devlink) if (err) return err; - value.vbool = true; + value.vbool = !mlx5_dev_is_lightweight(dev); devl_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, value); @@ -666,6 +612,7 @@ static const struct devlink_param mlx5_devlink_rdma_params[] = { static int mlx5_devlink_rdma_params_register(struct devlink *devlink) { + struct mlx5_core_dev *dev = devlink_priv(devlink); union devlink_param_value value; int err; @@ -677,7 +624,7 @@ static int mlx5_devlink_rdma_params_register(struct devlink *devlink) if (err) return err; - value.vbool = true; + value.vbool = !mlx5_dev_is_lightweight(dev); devl_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, value); @@ -712,7 +659,7 @@ static int mlx5_devlink_vnet_params_register(struct devlink *devlink) if (err) return err; - value.vbool = true; + value.vbool = !mlx5_dev_is_lightweight(dev); devl_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, value); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c index 9114661cd967..e869c65d8e90 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c @@ -2,6 +2,7 @@ /* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. */ #include "reporter_vnic.h" +#include "en_stats.h" #include "devlink.h" #define VNIC_ENV_GET64(vnic_env_stats, c) \ @@ -36,45 +37,72 @@ int mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev, if (err) return err; - err = devlink_fmsg_u64_pair_put(fmsg, "total_error_queues", - VNIC_ENV_GET64(&vnic, total_error_queues)); - if (err) - return err; - - err = devlink_fmsg_u64_pair_put(fmsg, "send_queue_priority_update_flow", - VNIC_ENV_GET64(&vnic, send_queue_priority_update_flow)); - if (err) - return err; - - err = devlink_fmsg_u64_pair_put(fmsg, "comp_eq_overrun", - VNIC_ENV_GET64(&vnic, comp_eq_overrun)); - if (err) - return err; - - err = devlink_fmsg_u64_pair_put(fmsg, "async_eq_overrun", - VNIC_ENV_GET64(&vnic, async_eq_overrun)); - if (err) - return err; - - err = devlink_fmsg_u64_pair_put(fmsg, "cq_overrun", - VNIC_ENV_GET64(&vnic, cq_overrun)); - if (err) - return err; - - err = devlink_fmsg_u64_pair_put(fmsg, "invalid_command", - VNIC_ENV_GET64(&vnic, invalid_command)); - if (err) - return err; - - err = devlink_fmsg_u64_pair_put(fmsg, "quota_exceeded_command", - VNIC_ENV_GET64(&vnic, quota_exceeded_command)); - if (err) - return err; - - err = devlink_fmsg_u64_pair_put(fmsg, "nic_receive_steering_discard", - VNIC_ENV_GET64(&vnic, nic_receive_steering_discard)); - if (err) - return err; + if (MLX5_CAP_GEN(dev, vnic_env_queue_counters)) { + err = devlink_fmsg_u32_pair_put(fmsg, "total_error_queues", + VNIC_ENV_GET(&vnic, total_error_queues)); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "send_queue_priority_update_flow", + VNIC_ENV_GET(&vnic, + send_queue_priority_update_flow)); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, eq_overrun_count)) { + err = devlink_fmsg_u32_pair_put(fmsg, "comp_eq_overrun", + VNIC_ENV_GET(&vnic, comp_eq_overrun)); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "async_eq_overrun", + VNIC_ENV_GET(&vnic, async_eq_overrun)); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, vnic_env_cq_overrun)) { + err = devlink_fmsg_u32_pair_put(fmsg, "cq_overrun", + VNIC_ENV_GET(&vnic, cq_overrun)); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, invalid_command_count)) { + err = devlink_fmsg_u32_pair_put(fmsg, "invalid_command", + VNIC_ENV_GET(&vnic, invalid_command)); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, quota_exceeded_count)) { + err = devlink_fmsg_u32_pair_put(fmsg, "quota_exceeded_command", + VNIC_ENV_GET(&vnic, quota_exceeded_command)); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, nic_receive_steering_discard)) { + err = devlink_fmsg_u64_pair_put(fmsg, "nic_receive_steering_discard", + VNIC_ENV_GET64(&vnic, + nic_receive_steering_discard)); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, vnic_env_cnt_steering_fail)) { + err = devlink_fmsg_u64_pair_put(fmsg, "generated_pkt_steering_fail", + VNIC_ENV_GET64(&vnic, + generated_pkt_steering_fail)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "handled_pkt_steering_fail", + VNIC_ENV_GET64(&vnic, handled_pkt_steering_fail)); + if (err) + return err; + } err = devlink_fmsg_obj_nest_end(fmsg); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 8e999f238194..b1807bfb815f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -165,15 +165,6 @@ struct page_pool; #define MLX5E_MAX_KLM_PER_WQE(mdev) \ MLX5E_KLM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * mlx5e_get_max_sq_aligned_wqebbs(mdev)) -#define MLX5E_MSG_LEVEL NETIF_MSG_LINK - -#define mlx5e_dbg(mlevel, priv, format, ...) \ -do { \ - if (NETIF_MSG_##mlevel & (priv)->msglevel) \ - netdev_warn(priv->netdev, format, \ - ##__VA_ARGS__); \ -} while (0) - #define mlx5e_state_dereference(priv, p) \ rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock)) @@ -594,13 +585,6 @@ struct mlx5e_mpw_info { #define MLX5E_MAX_RX_FRAGS 4 -/* a single cache unit is capable to serve one napi call (for non-striding rq) - * or a MPWQE (for striding rq). - */ -#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_MAX_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \ - MLX5_MPWRQ_MAX_PAGES_PER_WQE : NAPI_POLL_WEIGHT) -#define MLX5E_CACHE_SIZE (4 * roundup_pow_of_two(MLX5E_CACHE_UNIT)) - struct mlx5e_rq; typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*); typedef struct sk_buff * @@ -887,7 +871,6 @@ struct mlx5e_priv { #endif /* priv data path fields - end */ - u32 msglevel; unsigned long state; struct mutex state_lock; /* Protects Interface state */ struct mlx5e_rq drop_rq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c index 03cb79adf912..be83ad9db82a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c @@ -594,7 +594,7 @@ int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs) err = fs_any_create_table(fs); if (err) - return err; + goto err_free_any; err = fs_any_enable(fs); if (err) @@ -606,8 +606,8 @@ int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs) err_destroy_table: fs_any_destroy_table(fs_any); - - kfree(fs_any); +err_free_any: mlx5e_fs_set_any(fs, NULL); + kfree(fs_any); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index 7e8e96cc5cd0..8e25f4ef5ccc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -65,12 +65,13 @@ int mlx5e_port_query_buffer(struct mlx5e_priv *priv, MLX5_GET(bufferx_reg, buffer, xoff_threshold) * port_buff_cell_sz; total_used += port_buffer->buffer[i].size; - mlx5e_dbg(HW, priv, "buffer %d: size=%d, xon=%d, xoff=%d, epsb=%d, lossy=%d\n", i, - port_buffer->buffer[i].size, - port_buffer->buffer[i].xon, - port_buffer->buffer[i].xoff, - port_buffer->buffer[i].epsb, - port_buffer->buffer[i].lossy); + netdev_dbg(priv->netdev, "buffer %d: size=%d, xon=%d, xoff=%d, epsb=%d, lossy=%d\n", + i, + port_buffer->buffer[i].size, + port_buffer->buffer[i].xon, + port_buffer->buffer[i].xoff, + port_buffer->buffer[i].epsb, + port_buffer->buffer[i].lossy); } port_buffer->internal_buffers_size = 0; @@ -87,11 +88,11 @@ int mlx5e_port_query_buffer(struct mlx5e_priv *priv, port_buffer->internal_buffers_size - port_buffer->headroom_size; - mlx5e_dbg(HW, priv, - "total buffer size=%u, headroom buffer size=%u, internal buffers size=%u, spare buffer size=%u\n", - port_buffer->port_buffer_size, port_buffer->headroom_size, - port_buffer->internal_buffers_size, - port_buffer->spare_buffer_size); + netdev_dbg(priv->netdev, + "total buffer size=%u, headroom buffer size=%u, internal buffers size=%u, spare buffer size=%u\n", + port_buffer->port_buffer_size, port_buffer->headroom_size, + port_buffer->internal_buffers_size, + port_buffer->spare_buffer_size); out: kfree(out); return err; @@ -352,7 +353,7 @@ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) xoff = (301 + 216 * priv->dcbx.cable_len / 100) * speed / 1000 + 272 * mtu / 100; - mlx5e_dbg(HW, priv, "%s: xoff=%d\n", __func__, xoff); + netdev_dbg(priv->netdev, "%s: xoff=%d\n", __func__, xoff); return xoff; } @@ -484,6 +485,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, u8 *prio2buffer) { u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz; + struct net_device *netdev = priv->netdev; struct mlx5e_port_buffer port_buffer; u32 xoff = calculate_xoff(priv, mtu); bool update_prio2buffer = false; @@ -495,7 +497,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, int err; int i; - mlx5e_dbg(HW, priv, "%s: change=%x\n", __func__, change); + netdev_dbg(netdev, "%s: change=%x\n", __func__, change); max_mtu = max_t(unsigned int, priv->netdev->max_mtu, MINIMUM_MAX_MTU); err = mlx5e_port_query_buffer(priv, &port_buffer); @@ -510,8 +512,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, } if (change & MLX5E_PORT_BUFFER_PFC) { - mlx5e_dbg(HW, priv, "%s: requested PFC per priority bitmask: 0x%x\n", - __func__, pfc->pfc_en); + netdev_dbg(netdev, "%s: requested PFC per priority bitmask: 0x%x\n", + __func__, pfc->pfc_en); err = mlx5e_port_query_priority2buffer(priv->mdev, buffer); if (err) return err; @@ -526,8 +528,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (change & MLX5E_PORT_BUFFER_PRIO2BUFFER) { update_prio2buffer = true; for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) - mlx5e_dbg(HW, priv, "%s: requested to map prio[%d] to buffer %d\n", - __func__, i, prio2buffer[i]); + netdev_dbg(priv->netdev, "%s: requested to map prio[%d] to buffer %d\n", + __func__, i, prio2buffer[i]); err = fill_pfc_en(priv->mdev, &curr_pfc_en); if (err) @@ -541,10 +543,10 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (change & MLX5E_PORT_BUFFER_SIZE) { for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { - mlx5e_dbg(HW, priv, "%s: buffer[%d]=%d\n", __func__, i, buffer_size[i]); + netdev_dbg(priv->netdev, "%s: buffer[%d]=%d\n", __func__, i, buffer_size[i]); if (!port_buffer.buffer[i].lossy && !buffer_size[i]) { - mlx5e_dbg(HW, priv, "%s: lossless buffer[%d] size cannot be zero\n", - __func__, i); + netdev_dbg(priv->netdev, "%s: lossless buffer[%d] size cannot be zero\n", + __func__, i); return -EINVAL; } @@ -552,7 +554,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, total_used += buffer_size[i]; } - mlx5e_dbg(HW, priv, "%s: total buffer requested=%d\n", __func__, total_used); + netdev_dbg(priv->netdev, "%s: total buffer requested=%d\n", __func__, total_used); if (total_used > port_buffer.headroom_size && (total_used - port_buffer.headroom_size) > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 3cbebfba582b..b0b429a0321e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -729,8 +729,10 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, c = kvzalloc_node(sizeof(*c), GFP_KERNEL, dev_to_node(mlx5_core_dma_dev(mdev))); cparams = kvzalloc(sizeof(*cparams), GFP_KERNEL); - if (!c || !cparams) - return -ENOMEM; + if (!c || !cparams) { + err = -ENOMEM; + goto err_free; + } c->priv = priv; c->mdev = priv->mdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c index 2842195ee548..1874c2f0587f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -379,6 +379,12 @@ int mlx5e_htb_setup_tc(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb_ if (!htb && htb_qopt->command != TC_HTB_CREATE) return -EINVAL; + if (htb_qopt->prio) { + NL_SET_ERR_MSG_MOD(htb_qopt->extack, + "prio parameter is not supported by device with HTB offload enabled."); + return -EOPNOTSUPP; + } + switch (htb_qopt->command) { case TC_HTB_CREATE: if (!mlx5_qos_is_supported(priv->mdev)) { @@ -515,4 +521,3 @@ int mlx5e_mqprio_rl_get_node_hw_id(struct mlx5e_mqprio_rl *rl, int tc, u32 *hw_i *hw_id = rl->leaves_id[tc]; return 0; } - diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index fd191925ab4b..560800246573 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -136,7 +136,6 @@ static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr struct mlx5_eswitch *esw = br_offloads->esw; u16 vport_num, esw_owner_vhca_id; struct netlink_ext_ack *extack; - int ifindex = upper->ifindex; int err = 0; if (!netif_is_bridge_master(upper)) @@ -150,15 +149,15 @@ static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr if (mlx5_esw_bridge_is_local(dev, rep, esw)) err = info->linking ? - mlx5_esw_bridge_vport_link(ifindex, vport_num, esw_owner_vhca_id, + mlx5_esw_bridge_vport_link(upper, vport_num, esw_owner_vhca_id, br_offloads, extack) : - mlx5_esw_bridge_vport_unlink(ifindex, vport_num, esw_owner_vhca_id, + mlx5_esw_bridge_vport_unlink(upper, vport_num, esw_owner_vhca_id, br_offloads, extack); else if (mlx5_esw_bridge_dev_same_hw(rep, esw)) err = info->linking ? - mlx5_esw_bridge_vport_peer_link(ifindex, vport_num, esw_owner_vhca_id, + mlx5_esw_bridge_vport_peer_link(upper, vport_num, esw_owner_vhca_id, br_offloads, extack) : - mlx5_esw_bridge_vport_peer_unlink(ifindex, vport_num, esw_owner_vhca_id, + mlx5_esw_bridge_vport_peer_unlink(upper, vport_num, esw_owner_vhca_id, br_offloads, extack); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c index 92d3952dfa8b..feeb41693c17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c @@ -17,8 +17,10 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, if (err) return err; - if (mlx5e_is_eswitch_flow(parse_state->flow)) + if (mlx5e_is_eswitch_flow(parse_state->flow)) { attr->esw_attr->split_count = attr->esw_attr->out_count; + parse_state->if_count = 0; + } attr->flags |= MLX5_ATTR_FLAG_CT; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c index 291193f7120d..f63402c48028 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c @@ -294,6 +294,7 @@ parse_mirred_ovs_master(struct mlx5e_tc_act_parse_state *parse_state, if (err) return err; + parse_state->if_count = 0; esw_attr->out_count++; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c index 3b272bbf4c53..368a95fa77d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c @@ -98,8 +98,10 @@ tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state, attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - if (ns_type == MLX5_FLOW_NAMESPACE_FDB) + if (ns_type == MLX5_FLOW_NAMESPACE_FDB) { esw_attr->split_count = esw_attr->out_count; + parse_state->if_count = 0; + } return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c index ad09a8a5f36e..2d1d4a04501b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c @@ -66,6 +66,7 @@ tc_act_parse_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state, if (err) return err; + parse_state->if_count = 0; esw_attr->out_count++; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c index c8a3eaf189f6..a13c5e707b83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c @@ -166,6 +166,7 @@ tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state, return err; esw_attr->split_count = esw_attr->out_count; + parse_state->if_count = 0; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c index 310b99230760..f17575b09788 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c @@ -65,8 +65,10 @@ tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state, if (err) return err; - if (ns_type == MLX5_FLOW_NAMESPACE_FDB) + if (ns_type == MLX5_FLOW_NAMESPACE_FDB) { attr->esw_attr->split_count = attr->esw_attr->out_count; + parse_state->if_count = 0; + } return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c index 07c1895a2b23..7aa926e542d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c @@ -25,8 +25,8 @@ struct mlx5e_tc_act_stats { static const struct rhashtable_params act_counters_ht_params = { .head_offset = offsetof(struct mlx5e_tc_act_stats, hash), - .key_offset = 0, - .key_len = offsetof(struct mlx5e_tc_act_stats, counter), + .key_offset = offsetof(struct mlx5e_tc_act_stats, tc_act_cookie), + .key_len = sizeof_field(struct mlx5e_tc_act_stats, tc_act_cookie), .automatic_shrinking = true, }; @@ -169,14 +169,11 @@ mlx5e_tc_act_stats_fill_stats(struct mlx5e_tc_act_stats_handle *handle, { struct rhashtable *ht = &handle->ht; struct mlx5e_tc_act_stats *item; - struct mlx5e_tc_act_stats key; u64 pkts, bytes, lastused; int err = 0; - key.tc_act_cookie = fl_act->cookie; - rcu_read_lock(); - item = rhashtable_lookup(ht, &key, act_counters_ht_params); + item = rhashtable_lookup(ht, &fl_act->cookie, act_counters_ht_params); if (!item) { rcu_read_unlock(); err = -ENOENT; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c index 0290e0dea539..4e923a2874ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c @@ -112,10 +112,8 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *po int err; handle = kzalloc(sizeof(*handle), GFP_KERNEL); - if (!handle) { - kfree(handle); + if (!handle) return ERR_PTR(-ENOMEM); - } post_attr->chain = 0; post_attr->prio = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index a254e728ac95..fadfa8b50beb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -1545,7 +1545,8 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, attr->ct_attr.ct_action |= act->ct.action; /* So we can have clear + ct */ attr->ct_attr.zone = act->ct.zone; - attr->ct_attr.nf_ft = act->ct.flow_table; + if (!(act->ct.action & TCA_CT_ACT_CLEAR)) + attr->ct_attr.nf_ft = act->ct.flow_table; attr->ct_attr.act_miss_cookie = act->miss_cookie; return 0; @@ -1990,6 +1991,9 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *att if (!priv) return -EOPNOTSUPP; + if (attr->ct_attr.offloaded) + return 0; + if (attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR) { err = mlx5_tc_ct_entry_set_registers(priv, &attr->parse_attr->mod_hdr_acts, 0, 0, 0, 0); @@ -1999,11 +2003,15 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *att attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; } - if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */ + if (!attr->ct_attr.nf_ft) { /* means only ct clear action, and not ct_clear,ct() */ + attr->ct_attr.offloaded = true; return 0; + } mutex_lock(&priv->control_lock); err = __mlx5_tc_ct_flow_offload(priv, attr); + if (!err) + attr->ct_attr.offloaded = true; mutex_unlock(&priv->control_lock); return err; @@ -2021,7 +2029,7 @@ void mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr) { - if (!attr->ct_attr.ft) /* no ct action, return */ + if (!attr->ct_attr.offloaded) /* no ct action, return */ return; if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */ return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 8e9316fa46d4..b66c5f98067f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -29,6 +29,7 @@ struct mlx5_ct_attr { u32 ct_labels_id; u32 act_miss_mapping; u64 act_miss_cookie; + bool offloaded; struct mlx5_ct_ft *ft; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h index ba2b1f24ff14..6cc23af66b5b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h @@ -94,13 +94,13 @@ struct mlx5e_tc_flow { * destinations. */ struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS]; - struct mlx5e_tc_flow *peer_flow; struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */ struct list_head hairpin; /* flows sharing the same hairpin */ - struct list_head peer; /* flows with peer flow */ + struct list_head peer[MLX5_MAX_PORTS]; /* flows with peer flow */ struct list_head unready; /* flows not ready to be offloaded (e.g * due to missing route) */ + struct list_head peer_flows; /* flows on peer */ struct net_device *orig_dev; /* netdev adding flow first */ int tmp_entry_index; struct list_head tmp_list; /* temporary flow list used by neigh update */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index f0c3464f037f..1730f6a716ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -1030,9 +1030,6 @@ int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv, int out_index; int err = 0; - if (!mlx5e_is_eswitch_flow(flow)) - return 0; - parse_attr = attr->parse_attr; esw_attr = attr->esw_attr; *vf_tun = false; @@ -1464,10 +1461,12 @@ static void mlx5e_invalidate_encap(struct mlx5e_priv *priv, attr = mlx5e_tc_get_encap_attr(flow); esw_attr = attr->esw_attr; - if (flow_flag_test(flow, SLOW)) + if (flow_flag_test(flow, SLOW)) { mlx5e_tc_unoffload_from_slow_path(esw, flow); - else + } else { mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); + mlx5e_tc_unoffload_flow_post_acts(flow); + } mlx5e_tc_detach_mod_hdr(priv, flow, attr); attr->modify_hdr = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index f0e6095809fa..40589cebb773 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -662,8 +662,7 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) * as we know this is a page_pool page. */ - page_pool_put_defragged_page(page->pp, - page, -1, true); + page_pool_recycle_direct(page->pp, page); } while (++n < num); break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index 9e8e6184f9e4..ecfe93a479da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -84,6 +84,8 @@ enum mlx5e_xdp_xmit_mode { * MLX5E_XDP_XMIT_MODE_XSK: * none. */ +#define MLX5E_XDP_FIFO_ENTRIES2DS_MAX_RATIO 4 + union mlx5e_xdp_info { enum mlx5e_xdp_xmit_mode mode; union { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index d97e6df66f45..b8dd74453655 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -323,8 +323,11 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, net_prefetch(mxbuf->xdp.data); prog = rcu_dereference(rq->xdp_prog); - if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) + if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) { + if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) + wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); return NULL; /* page/packet was consumed by XDP */ + } /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse * will be handled by mlx5e_free_rx_wqe. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index c964644ee866..bac4717548c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -125,7 +125,7 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev, #ifdef CONFIG_MLX5_EN_TLS /* May send WQEs. */ - if (mlx5e_ktls_skb_offloaded(skb)) + if (tls_is_skb_tx_device_offloaded(skb)) if (unlikely(!mlx5e_ktls_handle_tx_skb(dev, sq, skb, &state->tls))) return false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c index 88a5aed9d678..c7d191f66ad1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c @@ -190,6 +190,7 @@ static int accel_fs_tcp_create_groups(struct mlx5e_flow_table *ft, in = kvzalloc(inlen, GFP_KERNEL); if (!in || !ft->g) { kfree(ft->g); + ft->g = NULL; kvfree(in); return -ENOMEM; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index dbe87bf89c0d..832d36be4a17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -808,9 +808,9 @@ static void setup_fte_upper_proto_match(struct mlx5_flow_spec *spec, struct upsp } if (upspec->sport) { - MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, udp_dport, + MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, udp_sport, upspec->sport_mask); - MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, udp_dport, upspec->sport); + MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, udp_sport, upspec->sport); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index eab5bc718771..8d995e304869 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -58,7 +58,9 @@ static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x) trailer_len = alen + plen + 2; - pskb_trim(skb, skb->len - trailer_len); + ret = pskb_trim(skb, skb->len - trailer_len); + if (unlikely(ret)) + return ret; if (skb->protocol == htons(ETH_P_IP)) { ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len); ip_send_check(ipv4hdr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c index cf704f106b7c..984fa04bd331 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -188,7 +188,6 @@ static void mlx5e_tls_debugfs_init(struct mlx5e_tls *tls, int mlx5e_ktls_init(struct mlx5e_priv *priv) { - struct mlx5_crypto_dek_pool *dek_pool; struct mlx5e_tls *tls; if (!mlx5e_is_ktls_device(priv->mdev)) @@ -199,12 +198,6 @@ int mlx5e_ktls_init(struct mlx5e_priv *priv) return -ENOMEM; tls->mdev = priv->mdev; - dek_pool = mlx5_crypto_dek_pool_create(priv->mdev, MLX5_ACCEL_OBJ_TLS_KEY); - if (IS_ERR(dek_pool)) { - kfree(tls); - return PTR_ERR(dek_pool); - } - tls->dek_pool = dek_pool; priv->tls = tls; mlx5e_tls_debugfs_init(tls, priv->dfs_root); @@ -222,7 +215,6 @@ void mlx5e_ktls_cleanup(struct mlx5e_priv *priv) debugfs_remove_recursive(tls->debugfs.dfs); tls->debugfs.dfs = NULL; - mlx5_crypto_dek_pool_destroy(tls->dek_pool); kfree(priv->tls); priv->tls = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 0e4c0a093293..d61be26a4df1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -846,7 +846,7 @@ bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, tls_ctx = tls_get_ctx(skb->sk); tls_netdev = rcu_dereference_bh(tls_ctx->netdev); /* Don't WARN on NULL: if tls_device_down is running in parallel, - * netdev might become NULL, even if tls_is_sk_tx_device_offloaded was + * netdev might become NULL, even if tls_is_skb_tx_device_offloaded was * true. Rather continue processing this packet. */ if (WARN_ON_ONCE(tls_netdev && tls_netdev != netdev)) @@ -908,28 +908,51 @@ static void mlx5e_tls_tx_debugfs_init(struct mlx5e_tls *tls, int mlx5e_ktls_init_tx(struct mlx5e_priv *priv) { + struct mlx5_crypto_dek_pool *dek_pool; struct mlx5e_tls *tls = priv->tls; + int err; + + if (!mlx5e_is_ktls_device(priv->mdev)) + return 0; + + /* DEK pool could be used by either or both of TX and RX. But we have to + * put the creation here to avoid syndrome when doing devlink reload. + */ + dek_pool = mlx5_crypto_dek_pool_create(priv->mdev, MLX5_ACCEL_OBJ_TLS_KEY); + if (IS_ERR(dek_pool)) + return PTR_ERR(dek_pool); + tls->dek_pool = dek_pool; if (!mlx5e_is_ktls_tx(priv->mdev)) return 0; priv->tls->tx_pool = mlx5e_tls_tx_pool_init(priv->mdev, &priv->tls->sw_stats); - if (!priv->tls->tx_pool) - return -ENOMEM; + if (!priv->tls->tx_pool) { + err = -ENOMEM; + goto err_tx_pool_init; + } mlx5e_tls_tx_debugfs_init(tls, tls->debugfs.dfs); return 0; + +err_tx_pool_init: + mlx5_crypto_dek_pool_destroy(dek_pool); + return err; } void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv) { if (!mlx5e_is_ktls_tx(priv->mdev)) - return; + goto dek_pool_destroy; debugfs_remove_recursive(priv->tls->debugfs.dfs_tx); priv->tls->debugfs.dfs_tx = NULL; mlx5e_tls_tx_pool_cleanup(priv->tls->tx_pool); priv->tls->tx_pool = NULL; + +dek_pool_destroy: + if (mlx5e_is_ktls_device(priv->mdev)) + mlx5_crypto_dek_pool_destroy(priv->tls->dek_pool); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h index 2dd78dd4ad65..f87b65c560ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h @@ -49,11 +49,6 @@ mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget) return budget && test_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &c->async_icosq.state); } -static inline bool mlx5e_ktls_skb_offloaded(struct sk_buff *skb) -{ - return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk); -} - static inline void mlx5e_ktls_handle_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg, struct mlx5e_accel_tx_tls_state *state) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 6b7b563f844a..592b165530ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -349,15 +349,6 @@ static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec, sa->macsec_rule = NULL; } -static struct mlx5e_priv *macsec_netdev_priv(const struct net_device *dev) -{ -#if IS_ENABLED(CONFIG_VLAN_8021Q) - if (is_vlan_dev(dev)) - return netdev_priv(vlan_dev_priv(dev)->real_dev); -#endif - return netdev_priv(dev); -} - static int mlx5e_macsec_init_sa(struct macsec_context *ctx, struct mlx5e_macsec_sa *sa, bool encrypt, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c index 7fc901a6ec5f..414e28584881 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c @@ -161,6 +161,7 @@ static int macsec_fs_tx_create_crypto_table_groups(struct mlx5e_flow_table *ft) if (!in) { kfree(ft->g); + ft->g = NULL; return -ENOMEM; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index bed0c2d043e7..5aa51d74f8b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -135,6 +135,16 @@ static void arfs_del_rules(struct mlx5e_flow_steering *fs); int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs) { + /* Moving to switchdev mode, fs->arfs is freed by mlx5e_nic_profile + * cleanup_rx callback and it is not recreated when + * mlx5e_uplink_rep_profile is loaded as mlx5e_create_flow_steering() + * is not called by the uplink_rep profile init_rx callback. Thus, if + * ntuple is set, moving to switchdev flow will enter this function + * with fs->arfs nullified. + */ + if (!mlx5e_fs_get_arfs(fs)) + return 0; + arfs_del_rules(fs); return arfs_disable(fs); @@ -570,10 +580,10 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, if (IS_ERR(rule)) { err = PTR_ERR(rule); priv->channel_stats[arfs_rule->rxq]->rq.arfs_err++; - mlx5e_dbg(HW, priv, - "%s: add rule(filter id=%d, rq idx=%d, ip proto=0x%x) failed,err=%d\n", - __func__, arfs_rule->filter_id, arfs_rule->rxq, - tuple->ip_proto, err); + netdev_dbg(priv->netdev, + "%s: add rule(filter id=%d, rq idx=%d, ip proto=0x%x) failed,err=%d\n", + __func__, arfs_rule->filter_id, arfs_rule->rxq, + tuple->ip_proto, err); } out: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index ebee52a8361a..8705cffc747f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -275,10 +275,10 @@ static int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets memcpy(priv->dcbx.tc_tsa, ets->tc_tsa, sizeof(ets->tc_tsa)); for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - mlx5e_dbg(HW, priv, "%s: prio_%d <=> tc_%d\n", - __func__, i, ets->prio_tc[i]); - mlx5e_dbg(HW, priv, "%s: tc_%d <=> tx_bw_%d%%, group_%d\n", - __func__, i, tc_tx_bw[i], tc_group[i]); + netdev_dbg(priv->netdev, "%s: prio_%d <=> tc_%d\n", + __func__, i, ets->prio_tc[i]); + netdev_dbg(priv->netdev, "%s: tc_%d <=> tx_bw_%d%%, group_%d\n", + __func__, i, tc_tx_bw[i], tc_group[i]); } return err; @@ -399,9 +399,9 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev, } if (!ret) { - mlx5e_dbg(HW, priv, - "%s: PFC per priority bit mask: 0x%x\n", - __func__, pfc->pfc_en); + netdev_dbg(dev, + "%s: PFC per priority bit mask: 0x%x\n", + __func__, pfc->pfc_en); } return ret; } @@ -611,8 +611,8 @@ static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev, } for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - mlx5e_dbg(HW, priv, "%s: tc_%d <=> max_bw %d Gbps\n", - __func__, i, max_bw_value[i]); + netdev_dbg(netdev, "%s: tc_%d <=> max_bw %d Gbps\n", + __func__, i, max_bw_value[i]); } return mlx5_modify_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit); @@ -640,10 +640,10 @@ static u8 mlx5e_dcbnl_setall(struct net_device *netdev) ets.tc_rx_bw[i] = cee_cfg->pg_bw_pct[i]; ets.tc_tsa[i] = IEEE_8021QAZ_TSA_ETS; ets.prio_tc[i] = cee_cfg->prio_to_pg_map[i]; - mlx5e_dbg(HW, priv, - "%s: Priority group %d: tx_bw %d, rx_bw %d, prio_tc %d\n", - __func__, i, ets.tc_tx_bw[i], ets.tc_rx_bw[i], - ets.prio_tc[i]); + netdev_dbg(netdev, + "%s: Priority group %d: tx_bw %d, rx_bw %d, prio_tc %d\n", + __func__, i, ets.tc_tx_bw[i], ets.tc_rx_bw[i], + ets.prio_tc[i]); } err = mlx5e_dbcnl_validate_ets(netdev, &ets, true); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 1f5a2110d31f..27861b68ced5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1689,16 +1689,6 @@ static int mlx5e_set_fecparam(struct net_device *netdev, return 0; } -static u32 mlx5e_get_msglevel(struct net_device *dev) -{ - return ((struct mlx5e_priv *)netdev_priv(dev))->msglevel; -} - -static void mlx5e_set_msglevel(struct net_device *dev, u32 val) -{ - ((struct mlx5e_priv *)netdev_priv(dev))->msglevel = val; -} - static int mlx5e_set_phys_id(struct net_device *dev, enum ethtool_phys_id_state state) { @@ -1952,9 +1942,9 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val if (err) return err; - mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n", - MLX5E_GET_PFLAG(&priv->channels.params, - MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF"); + netdev_dbg(priv->netdev, "MLX5E: RxCqeCmprss was turned %s\n", + MLX5E_GET_PFLAG(&priv->channels.params, + MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF"); return 0; } @@ -2444,8 +2434,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .get_priv_flags = mlx5e_get_priv_flags, .set_priv_flags = mlx5e_set_priv_flags, .self_test = mlx5e_self_test, - .get_msglevel = mlx5e_get_msglevel, - .set_msglevel = mlx5e_set_msglevel, .get_fec_stats = mlx5e_get_fec_stats, .get_fecparam = mlx5e_get_fecparam, .set_fecparam = mlx5e_set_fecparam, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 33bfe4d7338b..934b0d5ce1b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -283,7 +283,7 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_flow_steering *fs, if (IS_ERR(*rule_p)) { err = PTR_ERR(*rule_p); *rule_p = NULL; - fs_err(fs, "%s: add rule failed\n", __func__); + fs_err(fs, "add rule failed\n"); } return err; @@ -395,8 +395,7 @@ int mlx5e_add_vlan_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num if (IS_ERR(rule)) { err = PTR_ERR(rule); fs->vlan->trap_rule = NULL; - fs_err(fs, "%s: add VLAN trap rule failed, err %d\n", - __func__, err); + fs_err(fs, "add VLAN trap rule failed, err %d\n", err); return err; } fs->vlan->trap_rule = rule; @@ -421,8 +420,7 @@ int mlx5e_add_mac_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num) if (IS_ERR(rule)) { err = PTR_ERR(rule); fs->l2.trap_rule = NULL; - fs_err(fs, "%s: add MAC trap rule failed, err %d\n", - __func__, err); + fs_err(fs, "add MAC trap rule failed, err %d\n", err); return err; } fs->l2.trap_rule = rule; @@ -763,7 +761,7 @@ static int mlx5e_add_promisc_rule(struct mlx5e_flow_steering *fs) if (IS_ERR(*rule_p)) { err = PTR_ERR(*rule_p); *rule_p = NULL; - fs_err(fs, "%s: add promiscuous rule failed\n", __func__); + fs_err(fs, "add promiscuous rule failed\n"); } kvfree(spec); return err; @@ -995,7 +993,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_flow_steering *fs, ai->rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(ai->rule)) { - fs_err(fs, "%s: add l2 rule(mac:%pM) failed\n", __func__, mv_dmac); + fs_err(fs, "add l2 rule(mac:%pM) failed\n", mv_dmac); err = PTR_ERR(ai->rule); ai->rule = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a5bdf78955d7..f7b494125eee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1036,7 +1036,23 @@ static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_s return err; } -static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) +static void mlx5e_flush_rq_cq(struct mlx5e_rq *rq) +{ + struct mlx5_cqwq *cqwq = &rq->cq.wq; + struct mlx5_cqe64 *cqe; + + if (test_bit(MLX5E_RQ_STATE_MINI_CQE_ENHANCED, &rq->state)) { + while ((cqe = mlx5_cqwq_get_cqe_enahnced_comp(cqwq))) + mlx5_cqwq_pop(cqwq); + } else { + while ((cqe = mlx5_cqwq_get_cqe(cqwq))) + mlx5_cqwq_pop(cqwq); + } + + mlx5_cqwq_update_db_record(cqwq); +} + +int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state) { struct net_device *dev = rq->netdev; int err; @@ -1046,6 +1062,10 @@ static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn); return err; } + + mlx5e_free_rx_descs(rq); + mlx5e_flush_rq_cq(rq); + err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); if (err) { netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn); @@ -1055,13 +1075,6 @@ static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) return 0; } -int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state) -{ - mlx5e_free_rx_descs(rq); - - return mlx5e_rq_to_ready(rq, curr_state); -} - static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) { struct mlx5_core_dev *mdev = rq->mdev; @@ -1285,11 +1298,13 @@ static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa) { struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); - int entries = wq_sz * MLX5_SEND_WQEBB_NUM_DS * 2; /* upper bound for maximum num of - * entries of all xmit_modes. - */ + int entries; size_t size; + /* upper bound for maximum num of entries of all xmit_modes. */ + entries = roundup_pow_of_two(wq_sz * MLX5_SEND_WQEBB_NUM_DS * + MLX5E_XDP_FIFO_ENTRIES2DS_MAX_RATIO); + size = array_size(sizeof(*xdpi_fifo->xi), entries); xdpi_fifo->xi = kvzalloc_node(size, GFP_KERNEL, numa); if (!xdpi_fifo->xi) @@ -2402,7 +2417,7 @@ static int mlx5e_channel_stats_alloc(struct mlx5e_priv *priv, int ix, int cpu) /* Asymmetric dynamic memory allocation. * Freed in mlx5e_priv_arrays_free, not on channel closure. */ - mlx5e_dbg(DRV, priv, "Creating channel stats %d\n", ix); + netdev_dbg(priv->netdev, "Creating channel stats %d\n", ix); priv->channel_stats[ix] = kvzalloc_node(sizeof(**priv->channel_stats), GFP_KERNEL, cpu_to_node(cpu)); if (!priv->channel_stats[ix]) @@ -2780,7 +2795,7 @@ int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv) if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS)) num_txqs += ntc; - mlx5e_dbg(DRV, priv, "Setting num_txqs %d\n", num_txqs); + netdev_dbg(priv->netdev, "Setting num_txqs %d\n", num_txqs); err = netif_set_real_num_tx_queues(priv->netdev, num_txqs); if (err) netdev_warn(priv->netdev, "netif_set_real_num_tx_queues failed, %d\n", err); @@ -5253,6 +5268,7 @@ void mlx5e_destroy_q_counters(struct mlx5e_priv *priv) static int mlx5e_nic_init(struct mlx5_core_dev *mdev, struct net_device *netdev) { + const bool take_rtnl = netdev->reg_state == NETREG_REGISTERED; struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_flow_steering *fs; int err; @@ -5281,9 +5297,19 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, mlx5_core_err(mdev, "TLS initialization failed, %d\n", err); mlx5e_health_create_reporters(priv); + + /* If netdev is already registered (e.g. move from uplink to nic profile), + * RTNL lock must be held before triggering netdev notifiers. + */ + if (take_rtnl) + rtnl_lock(); + /* update XDP supported features */ mlx5e_set_xdp_feature(netdev); + if (take_rtnl) + rtnl_unlock(); + return 0; } @@ -5586,7 +5612,6 @@ int mlx5e_priv_init(struct mlx5e_priv *priv, /* priv init */ priv->mdev = mdev; priv->netdev = netdev; - priv->msglevel = MLX5E_MSG_LEVEL; priv->max_nch = nch; priv->max_opened_tc = 1; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 3e7041bd5705..99b3843396f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -374,7 +374,9 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { struct mlx5e_rep_sq *rep_sq, *tmp; + struct mlx5e_rep_sq_peer *sq_peer; struct mlx5e_rep_priv *rpriv; + unsigned long i; if (esw->mode != MLX5_ESWITCH_OFFLOADS) return; @@ -382,31 +384,78 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw, rpriv = mlx5e_rep_to_rep_priv(rep); list_for_each_entry_safe(rep_sq, tmp, &rpriv->vport_sqs_list, list) { mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule); - if (rep_sq->send_to_vport_rule_peer) - mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer); + xa_for_each(&rep_sq->sq_peer, i, sq_peer) { + if (sq_peer->rule) + mlx5_eswitch_del_send_to_vport_rule(sq_peer->rule); + + xa_erase(&rep_sq->sq_peer, i); + kfree(sq_peer); + } + + xa_destroy(&rep_sq->sq_peer); list_del(&rep_sq->list); kfree(rep_sq); } } +static int mlx5e_sqs2vport_add_peers_rules(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, + struct mlx5_devcom *devcom, + struct mlx5e_rep_sq *rep_sq, int i) +{ + struct mlx5_eswitch *peer_esw = NULL; + struct mlx5_flow_handle *flow_rule; + int tmp; + + mlx5_devcom_for_each_peer_entry(devcom, MLX5_DEVCOM_ESW_OFFLOADS, + peer_esw, tmp) { + u16 peer_rule_idx = MLX5_CAP_GEN(peer_esw->dev, vhca_id); + struct mlx5e_rep_sq_peer *sq_peer; + int err; + + sq_peer = kzalloc(sizeof(*sq_peer), GFP_KERNEL); + if (!sq_peer) + return -ENOMEM; + + flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, + rep, rep_sq->sqn); + if (IS_ERR(flow_rule)) { + kfree(sq_peer); + return PTR_ERR(flow_rule); + } + + sq_peer->rule = flow_rule; + sq_peer->peer = peer_esw; + err = xa_insert(&rep_sq->sq_peer, peer_rule_idx, sq_peer, GFP_KERNEL); + if (err) { + kfree(sq_peer); + mlx5_eswitch_del_send_to_vport_rule(flow_rule); + return err; + } + } + + return 0; +} + static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u32 *sqns_array, int sqns_num) { - struct mlx5_eswitch *peer_esw = NULL; struct mlx5_flow_handle *flow_rule; struct mlx5e_rep_priv *rpriv; struct mlx5e_rep_sq *rep_sq; + struct mlx5_devcom *devcom; + bool devcom_locked = false; int err; int i; if (esw->mode != MLX5_ESWITCH_OFFLOADS) return 0; + devcom = esw->dev->priv.devcom; rpriv = mlx5e_rep_to_rep_priv(rep); - if (mlx5_devcom_is_paired(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS)) - peer_esw = mlx5_devcom_get_peer_data(esw->dev->priv.devcom, - MLX5_DEVCOM_ESW_OFFLOADS); + if (mlx5_devcom_comp_is_ready(devcom, MLX5_DEVCOM_ESW_OFFLOADS) && + mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) + devcom_locked = true; for (i = 0; i < sqns_num; i++) { rep_sq = kzalloc(sizeof(*rep_sq), GFP_KERNEL); @@ -426,31 +475,30 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, rep_sq->send_to_vport_rule = flow_rule; rep_sq->sqn = sqns_array[i]; - if (peer_esw) { - flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, - rep, sqns_array[i]); - if (IS_ERR(flow_rule)) { - err = PTR_ERR(flow_rule); + xa_init(&rep_sq->sq_peer); + if (devcom_locked) { + err = mlx5e_sqs2vport_add_peers_rules(esw, rep, devcom, rep_sq, i); + if (err) { mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule); + xa_destroy(&rep_sq->sq_peer); kfree(rep_sq); goto out_err; } - rep_sq->send_to_vport_rule_peer = flow_rule; } list_add(&rep_sq->list, &rpriv->vport_sqs_list); } - if (peer_esw) - mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (devcom_locked) + mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); return 0; out_err: mlx5e_sqs2vport_stop(esw, rep); - if (peer_esw) - mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (devcom_locked) + mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); return err; } @@ -964,7 +1012,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) err = mlx5e_open_drop_rq(priv, &priv->drop_rq); if (err) { mlx5_core_err(mdev, "open drop rq failed, %d\n", err); - return err; + goto err_rx_res_free; } err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0, @@ -998,6 +1046,7 @@ err_destroy_rx_res: mlx5e_rx_res_destroy(priv->rx_res); err_close_drop_rq: mlx5e_close_drop_rq(&priv->drop_rq); +err_rx_res_free: mlx5e_rx_res_free(priv->rx_res); priv->rx_res = NULL; err_free_fs: @@ -1111,6 +1160,10 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) return err; } + err = mlx5e_rep_neigh_init(rpriv); + if (err) + goto err_neigh_init; + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { err = mlx5e_init_uplink_rep_tx(rpriv); if (err) @@ -1127,6 +1180,8 @@ err_ht_init: if (rpriv->rep->vport == MLX5_VPORT_UPLINK) mlx5e_cleanup_uplink_rep_tx(rpriv); err_init_tx: + mlx5e_rep_neigh_cleanup(rpriv); +err_neigh_init: mlx5e_destroy_tises(priv); return err; } @@ -1140,22 +1195,17 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) if (rpriv->rep->vport == MLX5_VPORT_UPLINK) mlx5e_cleanup_uplink_rep_tx(rpriv); + mlx5e_rep_neigh_cleanup(rpriv); mlx5e_destroy_tises(priv); } static void mlx5e_rep_enable(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = priv->ppriv; - mlx5e_set_netdev_mtu_boundaries(priv); - mlx5e_rep_neigh_init(rpriv); } static void mlx5e_rep_disable(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = priv->ppriv; - - mlx5e_rep_neigh_cleanup(rpriv); } static int mlx5e_update_rep_rx(struct mlx5e_priv *priv) @@ -1205,7 +1255,6 @@ static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = priv->ppriv; struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; u16 max_mtu; @@ -1227,7 +1276,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) mlx5_notifier_register(mdev, &priv->events_nb); mlx5e_dcbnl_initialize(priv); mlx5e_dcbnl_init_app(priv); - mlx5e_rep_neigh_init(rpriv); mlx5e_rep_bridge_init(priv); netdev->wanted_features |= NETIF_F_HW_TC; @@ -1242,7 +1290,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_core_dev *mdev = priv->mdev; rtnl_lock(); @@ -1252,7 +1299,6 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) rtnl_unlock(); mlx5e_rep_bridge_cleanup(priv); - mlx5e_rep_neigh_cleanup(rpriv); mlx5e_dcbnl_delete_app(priv); mlx5_notifier_unregister(mdev, &priv->events_nb); mlx5e_rep_tc_disable(priv); @@ -1530,17 +1576,24 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) return rpriv->netdev; } -static void mlx5e_vport_rep_event_unpair(struct mlx5_eswitch_rep *rep) +static void mlx5e_vport_rep_event_unpair(struct mlx5_eswitch_rep *rep, + struct mlx5_eswitch *peer_esw) { + u16 i = MLX5_CAP_GEN(peer_esw->dev, vhca_id); struct mlx5e_rep_priv *rpriv; struct mlx5e_rep_sq *rep_sq; + WARN_ON_ONCE(!peer_esw); rpriv = mlx5e_rep_to_rep_priv(rep); list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) { - if (!rep_sq->send_to_vport_rule_peer) + struct mlx5e_rep_sq_peer *sq_peer = xa_load(&rep_sq->sq_peer, i); + + if (!sq_peer || sq_peer->peer != peer_esw) continue; - mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer); - rep_sq->send_to_vport_rule_peer = NULL; + + mlx5_eswitch_del_send_to_vport_rule(sq_peer->rule); + xa_erase(&rep_sq->sq_peer, i); + kfree(sq_peer); } } @@ -1548,24 +1601,52 @@ static int mlx5e_vport_rep_event_pair(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, struct mlx5_eswitch *peer_esw) { + u16 i = MLX5_CAP_GEN(peer_esw->dev, vhca_id); struct mlx5_flow_handle *flow_rule; + struct mlx5e_rep_sq_peer *sq_peer; struct mlx5e_rep_priv *rpriv; struct mlx5e_rep_sq *rep_sq; + int err; rpriv = mlx5e_rep_to_rep_priv(rep); list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) { - if (rep_sq->send_to_vport_rule_peer) + sq_peer = xa_load(&rep_sq->sq_peer, i); + + if (sq_peer && sq_peer->peer) continue; - flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, rep, rep_sq->sqn); - if (IS_ERR(flow_rule)) + + flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, rep, + rep_sq->sqn); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); goto err_out; - rep_sq->send_to_vport_rule_peer = flow_rule; + } + + if (sq_peer) { + sq_peer->rule = flow_rule; + sq_peer->peer = peer_esw; + continue; + } + sq_peer = kzalloc(sizeof(*sq_peer), GFP_KERNEL); + if (!sq_peer) { + err = -ENOMEM; + goto err_sq_alloc; + } + err = xa_insert(&rep_sq->sq_peer, i, sq_peer, GFP_KERNEL); + if (err) + goto err_xa; + sq_peer->rule = flow_rule; + sq_peer->peer = peer_esw; } return 0; +err_xa: + kfree(sq_peer); +err_sq_alloc: + mlx5_eswitch_del_send_to_vport_rule(flow_rule); err_out: - mlx5e_vport_rep_event_unpair(rep); - return PTR_ERR(flow_rule); + mlx5e_vport_rep_event_unpair(rep, peer_esw); + return err; } static int mlx5e_vport_rep_event(struct mlx5_eswitch *esw, @@ -1578,7 +1659,7 @@ static int mlx5e_vport_rep_event(struct mlx5_eswitch *esw, if (event == MLX5_SWITCHDEV_EVENT_PAIR) err = mlx5e_vport_rep_event_pair(esw, rep, data); else if (event == MLX5_SWITCHDEV_EVENT_UNPAIR) - mlx5e_vport_rep_event_unpair(rep); + mlx5e_vport_rep_event_unpair(rep, data); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 80b7f5079a5a..70640fa1ad7b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -225,9 +225,14 @@ struct mlx5e_encap_entry { struct rcu_head rcu; }; +struct mlx5e_rep_sq_peer { + struct mlx5_flow_handle *rule; + void *peer; +}; + struct mlx5e_rep_sq { struct mlx5_flow_handle *send_to_vport_rule; - struct mlx5_flow_handle *send_to_vport_rule_peer; + struct xarray sq_peer; u32 sqn; struct list_head list; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 69634829558e..41d37159e027 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -390,10 +390,18 @@ static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) { struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix); - if (rq->xsk_pool) + if (rq->xsk_pool) { mlx5e_xsk_free_rx_wqe(wi); - else + } else { mlx5e_free_rx_wqe(rq, wi); + + /* Avoid a second release of the wqe pages: dealloc is called + * for the same missing wqes on regular RQ flush and on regular + * RQ close. This happens when XSK RQs come into play. + */ + for (int i = 0; i < rq->wqe.info.num_frags; i++, wi++) + wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); + } } static void mlx5e_xsk_free_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) @@ -491,9 +499,7 @@ mlx5e_add_skb_shared_info_frag(struct mlx5e_rq *rq, struct skb_shared_info *sinf } frag = &sinfo->frags[sinfo->nr_frags++]; - __skb_frag_set_page(frag, frag_page->page); - skb_frag_off_set(frag, frag_offset); - skb_frag_size_set(frag, len); + skb_frag_fill_page_desc(frag, frag_page->page, frag_offset, len); if (page_is_pfmemalloc(frag_page->page)) xdp_buff_set_frag_pfmemalloc(xdp); @@ -1745,11 +1751,11 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi prog = rcu_dereference(rq->xdp_prog); if (prog && mlx5e_xdp_handle(rq, prog, &mxbuf)) { - if (test_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { struct mlx5e_wqe_frag_info *pwi; for (pwi = head_wi; pwi < wi; pwi++) - pwi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); + pwi->frag_page->frags++; } return NULL; /* page/packet was consumed by XDP */ } @@ -1819,12 +1825,8 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) rq, wi, cqe, cqe_bcnt); if (!skb) { /* probably for XDP */ - if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { - /* do not return page to cache, - * it will be returned on XDP_TX completion. - */ - wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); - } + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) + wi->frag_page->frags++; goto wq_cyc_pop; } @@ -1870,12 +1872,8 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) rq, wi, cqe, cqe_bcnt); if (!skb) { /* probably for XDP */ - if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { - /* do not return page to cache, - * it will be returned on XDP_TX completion. - */ - wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); - } + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) + wi->frag_page->frags++; goto wq_cyc_pop; } @@ -2054,12 +2052,12 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w if (prog) { if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { - int i; + struct mlx5e_frag_page *pfp; + + for (pfp = head_page; pfp < frag_page; pfp++) + pfp->frags++; - for (i = 0; i < sinfo->nr_frags; i++) - /* non-atomic */ - __set_bit(page_idx + i, wi->skip_release_bitmap); - return NULL; + wi->linear_page.frags++; } mlx5e_page_release_fragmented(rq, &wi->linear_page); return NULL; /* page/packet was consumed by XDP */ @@ -2157,7 +2155,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, cqe_bcnt, &mxbuf); if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) - __set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */ + frag_page->frags++; return NULL; /* page/packet was consumed by XDP */ } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index f1d9596905c6..4d77055abd4b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -30,7 +30,7 @@ * SOFTWARE. */ -#include "lib/mlx5.h" +#include "lib/events.h" #include "en.h" #include "en_accel/ktls.h" #include "en_accel/en_accel.h" @@ -748,11 +748,22 @@ static const struct counter_desc vport_stats_desc[] = { VPORT_COUNTER_OFF(transmitted_ib_multicast.octets) }, }; +static const struct counter_desc vport_loopback_stats_desc[] = { + { "vport_loopback_packets", + VPORT_COUNTER_OFF(local_loopback.packets) }, + { "vport_loopback_bytes", + VPORT_COUNTER_OFF(local_loopback.octets) }, +}; + #define NUM_VPORT_COUNTERS ARRAY_SIZE(vport_stats_desc) +#define NUM_VPORT_LOOPBACK_COUNTERS(dev) \ + (MLX5_CAP_GEN(dev, vport_counter_local_loopback) ? \ + ARRAY_SIZE(vport_loopback_stats_desc) : 0) static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vport) { - return NUM_VPORT_COUNTERS; + return NUM_VPORT_COUNTERS + + NUM_VPORT_LOOPBACK_COUNTERS(priv->mdev); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vport) @@ -761,6 +772,11 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vport) for (i = 0; i < NUM_VPORT_COUNTERS; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, vport_stats_desc[i].format); + + for (i = 0; i < NUM_VPORT_LOOPBACK_COUNTERS(priv->mdev); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + vport_loopback_stats_desc[i].format); + return idx; } @@ -771,6 +787,11 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vport) for (i = 0; i < NUM_VPORT_COUNTERS; i++) data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out, vport_stats_desc, i); + + for (i = 0; i < NUM_VPORT_LOOPBACK_COUNTERS(priv->mdev); i++) + data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out, + vport_loopback_stats_desc, i); + return idx; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index b9b1da751a3b..4b22a91482ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1639,7 +1639,8 @@ static void remove_unready_flow(struct mlx5e_tc_flow *flow) uplink_priv = &rpriv->uplink_priv; mutex_lock(&uplink_priv->unready_flows_lock); - unready_flow_del(flow); + if (flow_flag_test(flow, NOT_READY)) + unready_flow_del(flow); mutex_unlock(&uplink_priv->unready_flows_lock); } @@ -1667,8 +1668,11 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro { struct mlx5e_priv *out_priv, *route_priv; struct mlx5_core_dev *route_mdev; + struct mlx5_devcom *devcom; struct mlx5_eswitch *esw; u16 vhca_id; + int err; + int i; out_priv = netdev_priv(out_dev); esw = out_priv->mdev->priv.eswitch; @@ -1676,28 +1680,25 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro route_mdev = route_priv->mdev; vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id); - if (mlx5_lag_is_active(out_priv->mdev)) { - struct mlx5_devcom *devcom; - int err; - - /* In lag case we may get devices from different eswitch instances. - * If we failed to get vport num, it means, mostly, that we on the wrong - * eswitch. - */ - err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); - if (err != -ENOENT) - return err; - - rcu_read_lock(); - devcom = out_priv->mdev->priv.devcom; - esw = mlx5_devcom_get_peer_data_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS); - err = esw ? mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport) : -ENODEV; - rcu_read_unlock(); + err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); + if (!err) + return err; + if (!mlx5_lag_is_active(out_priv->mdev)) return err; + + rcu_read_lock(); + devcom = out_priv->mdev->priv.devcom; + err = -ENODEV; + mlx5_devcom_for_each_peer_entry_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS, + esw, i) { + err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); + if (!err) + break; } + rcu_read_unlock(); - return mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); + return err; } static int @@ -1724,6 +1725,19 @@ verify_attr_actions(u32 actions, struct netlink_ext_ack *extack) return 0; } +static bool +has_encap_dests(struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + int out_index; + + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) + if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) + return true; + + return false; +} + static int post_process_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr, @@ -1736,9 +1750,11 @@ post_process_attr(struct mlx5e_tc_flow *flow, if (err) goto err_out; - err = mlx5e_tc_tun_encap_dests_set(flow->priv, flow, attr, extack, &vf_tun); - if (err) - goto err_out; + if (mlx5e_is_eswitch_flow(flow) && has_encap_dests(attr)) { + err = mlx5e_tc_tun_encap_dests_set(flow->priv, flow, attr, extack, &vf_tun); + if (err) + goto err_out; + } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr); @@ -1927,13 +1943,10 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_flow_attr *attr = flow->attr; - struct mlx5_esw_flow_attr *esw_attr; - esw_attr = attr->esw_attr; mlx5e_put_flow_tunnel_id(flow); - if (flow_flag_test(flow, NOT_READY)) - remove_unready_flow(flow); + remove_unready_flow(flow); if (mlx5e_is_offloaded_flow(flow)) { if (flow_flag_test(flow, SLOW)) @@ -1951,12 +1964,6 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr); - if (esw_attr->int_port) - mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port); - - if (esw_attr->dest_int_port) - mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->dest_int_port); - if (flow_flag_test(flow, L3_TO_L2_DECAP)) mlx5e_detach_decap(priv, flow); @@ -1987,47 +1994,59 @@ void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list) mlx5e_flow_put(priv, flow); } -static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) +static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow, + int peer_index) { struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; + struct mlx5e_tc_flow *peer_flow; + struct mlx5e_tc_flow *tmp; if (!flow_flag_test(flow, ESWITCH) || !flow_flag_test(flow, DUP)) return; mutex_lock(&esw->offloads.peer_mutex); - list_del(&flow->peer); + list_del(&flow->peer[peer_index]); mutex_unlock(&esw->offloads.peer_mutex); - flow_flag_clear(flow, DUP); - - if (refcount_dec_and_test(&flow->peer_flow->refcnt)) { - mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow); - kfree(flow->peer_flow); + list_for_each_entry_safe(peer_flow, tmp, &flow->peer_flows, peer_flows) { + if (peer_index != mlx5_get_dev_index(peer_flow->priv->mdev)) + continue; + if (refcount_dec_and_test(&peer_flow->refcnt)) { + mlx5e_tc_del_fdb_flow(peer_flow->priv, peer_flow); + list_del(&peer_flow->peer_flows); + kfree(peer_flow); + } } - flow->peer_flow = NULL; + if (list_empty(&flow->peer_flows)) + flow_flag_clear(flow, DUP); } -static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) +static void mlx5e_tc_del_fdb_peers_flow(struct mlx5e_tc_flow *flow) { - struct mlx5_core_dev *dev = flow->priv->mdev; - struct mlx5_devcom *devcom = dev->priv.devcom; - struct mlx5_eswitch *peer_esw; - - peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); - if (!peer_esw) - return; + int i; - __mlx5e_tc_del_fdb_peer_flow(flow); - mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + for (i = 0; i < MLX5_MAX_PORTS; i++) { + if (i == mlx5_get_dev_index(flow->priv->mdev)) + continue; + mlx5e_tc_del_fdb_peer_flow(flow, i); + } } static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { if (mlx5e_is_eswitch_flow(flow)) { - mlx5e_tc_del_fdb_peer_flow(flow); + struct mlx5_devcom *devcom = flow->priv->mdev->priv.devcom; + + if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) { + mlx5e_tc_del_fdb_flow(priv, flow); + return; + } + + mlx5e_tc_del_fdb_peers_flow(flow); + mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); mlx5e_tc_del_fdb_flow(priv, flow); } else { mlx5e_tc_del_nic_flow(priv, flow); @@ -2503,6 +2522,12 @@ static int mlx5e_flower_parse_meta(struct net_device *filter_dev, return 0; flow_rule_match_meta(rule, &match); + + if (match.mask->l2_miss) { + NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on \"l2_miss\""); + return -EOPNOTSUPP; + } + if (!match.mask->ingress_ifindex) return 0; @@ -3914,6 +3939,7 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, } i_split = i + 1; + parse_state->if_count = 0; list_add(&attr->list, &flow->attrs); } @@ -4198,8 +4224,8 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) flow_flag_test(flow, INGRESS); bool act_is_encap = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); - bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom, - MLX5_DEVCOM_ESW_OFFLOADS); + bool esw_paired = mlx5_devcom_comp_is_ready(esw_attr->in_mdev->priv.devcom, + MLX5_DEVCOM_ESW_OFFLOADS); if (!esw_paired) return false; @@ -4235,6 +4261,7 @@ static void mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) { struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow); + struct mlx5_esw_flow_attr *esw_attr; if (!attr) return; @@ -4252,6 +4279,18 @@ mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *a mlx5e_tc_detach_mod_hdr(flow->priv, flow, attr); } + if (mlx5e_is_eswitch_flow(flow)) { + esw_attr = attr->esw_attr; + + if (esw_attr->int_port) + mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(flow->priv), + esw_attr->int_port); + + if (esw_attr->dest_int_port) + mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(flow->priv), + esw_attr->dest_int_port); + } + mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr); free_branch_attr(flow, attr->branch_true); @@ -4290,6 +4329,7 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, INIT_LIST_HEAD(&flow->hairpin); INIT_LIST_HEAD(&flow->l3_to_l2_reformat); INIT_LIST_HEAD(&flow->attrs); + INIT_LIST_HEAD(&flow->peer_flows); refcount_set(&flow->refcnt, 1); init_completion(&flow->init_done); init_completion(&flow->del_hw_done); @@ -4398,22 +4438,19 @@ out: static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, struct mlx5e_tc_flow *flow, - unsigned long flow_flags) + unsigned long flow_flags, + struct mlx5_eswitch *peer_esw) { struct mlx5e_priv *priv = flow->priv, *peer_priv; - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; - struct mlx5_devcom *devcom = priv->mdev->priv.devcom; struct mlx5e_tc_flow_parse_attr *parse_attr; + int i = mlx5_get_dev_index(peer_esw->dev); struct mlx5e_rep_priv *peer_urpriv; struct mlx5e_tc_flow *peer_flow; struct mlx5_core_dev *in_mdev; int err = 0; - peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); - if (!peer_esw) - return -ENODEV; - peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH); peer_priv = netdev_priv(peer_urpriv->netdev); @@ -4438,14 +4475,13 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, goto out; } - flow->peer_flow = peer_flow; + list_add_tail(&peer_flow->peer_flows, &flow->peer_flows); flow_flag_set(flow, DUP); mutex_lock(&esw->offloads.peer_mutex); - list_add_tail(&flow->peer, &esw->offloads.peer_flows); + list_add_tail(&flow->peer[i], &esw->offloads.peer_flows[i]); mutex_unlock(&esw->offloads.peer_mutex); out: - mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); return err; } @@ -4456,30 +4492,48 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) { + struct mlx5_devcom *devcom = priv->mdev->priv.devcom; struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *in_rep = rpriv->rep; struct mlx5_core_dev *in_mdev = priv->mdev; + struct mlx5_eswitch *peer_esw; struct mlx5e_tc_flow *flow; int err; + int i; flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep, in_mdev); if (IS_ERR(flow)) return PTR_ERR(flow); - if (is_peer_flow_needed(flow)) { - err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags); - if (err) { - mlx5e_tc_del_fdb_flow(priv, flow); - goto out; - } + if (!is_peer_flow_needed(flow)) { + *__flow = flow; + return 0; } - *__flow = flow; + if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) { + err = -ENODEV; + goto clean_flow; + } + mlx5_devcom_for_each_peer_entry(devcom, + MLX5_DEVCOM_ESW_OFFLOADS, + peer_esw, i) { + err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags, peer_esw); + if (err) + goto peer_clean; + } + + mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + + *__flow = flow; return 0; -out: +peer_clean: + mlx5e_tc_del_fdb_peers_flow(flow); + mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); +clean_flow: + mlx5e_tc_del_fdb_flow(priv, flow); return err; } @@ -4697,7 +4751,6 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, { struct mlx5_devcom *devcom = priv->mdev->priv.devcom; struct rhashtable *tc_ht = get_tc_ht(priv, flags); - struct mlx5_eswitch *peer_esw; struct mlx5e_tc_flow *flow; struct mlx5_fc *counter; u64 lastuse = 0; @@ -4732,23 +4785,29 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, /* Under multipath it's possible for one rule to be currently * un-offloaded while the other rule is offloaded. */ - peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); - if (!peer_esw) + if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) goto out; - if (flow_flag_test(flow, DUP) && - flow_flag_test(flow->peer_flow, OFFLOADED)) { - u64 bytes2; - u64 packets2; - u64 lastuse2; + if (flow_flag_test(flow, DUP)) { + struct mlx5e_tc_flow *peer_flow; - if (flow_flag_test(flow, USE_ACT_STATS)) { - f->use_act_stats = true; - } else { - counter = mlx5e_tc_get_counter(flow->peer_flow); + list_for_each_entry(peer_flow, &flow->peer_flows, peer_flows) { + u64 packets2; + u64 lastuse2; + u64 bytes2; + + if (!flow_flag_test(peer_flow, OFFLOADED)) + continue; + if (flow_flag_test(flow, USE_ACT_STATS)) { + f->use_act_stats = true; + break; + } + + counter = mlx5e_tc_get_counter(peer_flow); if (!counter) goto no_peer_counter; - mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2); + mlx5_fc_query_cached(counter, &bytes2, &packets2, + &lastuse2); bytes += bytes2; packets += packets2; @@ -4757,7 +4816,7 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, } no_peer_counter: - mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); out: flow_stats_update(&f->stats, bytes, packets, 0, lastuse, FLOW_ACTION_HW_STATS_DELAYED); @@ -5275,9 +5334,14 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw) { struct mlx5e_tc_flow *flow, *tmp; + int i; - list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer) - __mlx5e_tc_del_fdb_peer_flow(flow); + for (i = 0; i < MLX5_MAX_PORTS; i++) { + if (i == mlx5_get_dev_index(esw->dev)) + continue; + list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows[i], peer[i]) + mlx5e_tc_del_fdb_peers_flow(flow); + } } void mlx5e_tc_reoffload_flows_work(struct work_struct *work) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index fbb2d963fb7e..a7d9b7cb4297 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -207,7 +207,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) } ch_stats->aff_change++; aff_change = true; - if (budget && work_done == budget) + if (work_done == budget) work_done--; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c index 2e504c7461c6..24b1ca4e4ff8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c @@ -15,13 +15,27 @@ static void esw_acl_egress_ofld_fwd2vport_destroy(struct mlx5_vport *vport) vport->egress.offloads.fwd_rule = NULL; } -static void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport) +void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport, int rule_index) { - if (!vport->egress.offloads.bounce_rule) + struct mlx5_flow_handle *bounce_rule = + xa_load(&vport->egress.offloads.bounce_rules, rule_index); + + if (!bounce_rule) return; - mlx5_del_flow_rules(vport->egress.offloads.bounce_rule); - vport->egress.offloads.bounce_rule = NULL; + mlx5_del_flow_rules(bounce_rule); + xa_erase(&vport->egress.offloads.bounce_rules, rule_index); +} + +static void esw_acl_egress_ofld_bounce_rules_destroy(struct mlx5_vport *vport) +{ + struct mlx5_flow_handle *bounce_rule; + unsigned long i; + + xa_for_each(&vport->egress.offloads.bounce_rules, i, bounce_rule) { + mlx5_del_flow_rules(bounce_rule); + xa_erase(&vport->egress.offloads.bounce_rules, i); + } } static int esw_acl_egress_ofld_fwd2vport_create(struct mlx5_eswitch *esw, @@ -96,7 +110,7 @@ static void esw_acl_egress_ofld_rules_destroy(struct mlx5_vport *vport) { esw_acl_egress_vlan_destroy(vport); esw_acl_egress_ofld_fwd2vport_destroy(vport); - esw_acl_egress_ofld_bounce_rule_destroy(vport); + esw_acl_egress_ofld_bounce_rules_destroy(vport); } static int esw_acl_egress_ofld_groups_create(struct mlx5_eswitch *esw, @@ -194,6 +208,7 @@ int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport vport->egress.acl = NULL; return err; } + vport->egress.type = VPORT_EGRESS_ACL_TYPE_DEFAULT; err = esw_acl_egress_ofld_groups_create(esw, vport); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c index 45b839116212..d599e50af346 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c @@ -35,7 +35,8 @@ esw_acl_table_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int ns, } ft_attr.max_fte = size; - ft_attr.flags = MLX5_FLOW_TABLE_OTHER_VPORT; + if (vport_num || mlx5_core_is_ecpf(esw->dev)) + ft_attr.flags = MLX5_FLOW_TABLE_OTHER_VPORT; acl = mlx5_create_vport_flow_table(root_ns, &ft_attr, vport_num); if (IS_ERR(acl)) { err = PTR_ERR(acl); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h index c9f8469e9a47..536b04e83618 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h @@ -10,6 +10,7 @@ /* Eswitch acl egress external APIs */ int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport); +void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport, int rule_index); int mlx5_esw_acl_egress_vport_bond(struct mlx5_eswitch *esw, u16 active_vport_num, u16 passive_vport_num); int mlx5_esw_acl_egress_vport_unbond(struct mlx5_eswitch *esw, u16 vport_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index 1ba03e219111..f4fe1daa4afd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -647,22 +647,35 @@ mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, } static struct mlx5_flow_handle * -mlx5_esw_bridge_ingress_flow_peer_create(u16 vport_num, const unsigned char *addr, +mlx5_esw_bridge_ingress_flow_peer_create(u16 vport_num, u16 esw_owner_vhca_id, + const unsigned char *addr, struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, struct mlx5_esw_bridge *bridge) { struct mlx5_devcom *devcom = bridge->br_offloads->esw->dev->priv.devcom; + struct mlx5_eswitch *tmp, *peer_esw = NULL; static struct mlx5_flow_handle *handle; - struct mlx5_eswitch *peer_esw; + int i; - peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); - if (!peer_esw) + if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) return ERR_PTR(-ENODEV); + mlx5_devcom_for_each_peer_entry(devcom, + MLX5_DEVCOM_ESW_OFFLOADS, + tmp, i) { + if (mlx5_esw_is_owner(tmp, vport_num, esw_owner_vhca_id)) { + peer_esw = tmp; + break; + } + } + if (!peer_esw) { + mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + return ERR_PTR(-ENODEV); + } + handle = mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id, bridge, peer_esw); - - mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); return handle; } @@ -821,7 +834,7 @@ mlx5_esw_bridge_egress_miss_flow_create(struct mlx5_flow_table *egress_ft, return handle; } -static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex, +static struct mlx5_esw_bridge *mlx5_esw_bridge_create(struct net_device *br_netdev, struct mlx5_esw_bridge_offloads *br_offloads) { struct mlx5_esw_bridge *bridge; @@ -845,11 +858,12 @@ static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex, goto err_mdb_ht; INIT_LIST_HEAD(&bridge->fdb_list); - bridge->ifindex = ifindex; + bridge->ifindex = br_netdev->ifindex; bridge->refcnt = 1; bridge->ageing_time = clock_t_to_jiffies(BR_DEFAULT_AGEING_TIME); bridge->vlan_proto = ETH_P_8021Q; list_add(&bridge->list, &br_offloads->bridges); + mlx5_esw_bridge_debugfs_init(br_netdev, bridge); return bridge; @@ -873,6 +887,7 @@ static void mlx5_esw_bridge_put(struct mlx5_esw_bridge_offloads *br_offloads, if (--bridge->refcnt) return; + mlx5_esw_bridge_debugfs_cleanup(bridge); mlx5_esw_bridge_egress_table_cleanup(bridge); mlx5_esw_bridge_mcast_disable(bridge); list_del(&bridge->list); @@ -885,14 +900,14 @@ static void mlx5_esw_bridge_put(struct mlx5_esw_bridge_offloads *br_offloads, } static struct mlx5_esw_bridge * -mlx5_esw_bridge_lookup(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads) +mlx5_esw_bridge_lookup(struct net_device *br_netdev, struct mlx5_esw_bridge_offloads *br_offloads) { struct mlx5_esw_bridge *bridge; ASSERT_RTNL(); list_for_each_entry(bridge, &br_offloads->bridges, list) { - if (bridge->ifindex == ifindex) { + if (bridge->ifindex == br_netdev->ifindex) { mlx5_esw_bridge_get(bridge); return bridge; } @@ -905,7 +920,7 @@ mlx5_esw_bridge_lookup(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads return ERR_PTR(err); } - bridge = mlx5_esw_bridge_create(ifindex, br_offloads); + bridge = mlx5_esw_bridge_create(br_netdev, br_offloads); if (IS_ERR(bridge) && list_empty(&br_offloads->bridges)) mlx5_esw_bridge_ingress_table_cleanup(br_offloads); return bridge; @@ -1369,8 +1384,9 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, u16 esw_ow entry->ingress_counter = counter; handle = peer ? - mlx5_esw_bridge_ingress_flow_peer_create(vport_num, addr, vlan, - mlx5_fc_id(counter), bridge) : + mlx5_esw_bridge_ingress_flow_peer_create(vport_num, esw_owner_vhca_id, + addr, vlan, mlx5_fc_id(counter), + bridge) : mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vlan, mlx5_fc_id(counter), bridge); if (IS_ERR(handle)) { @@ -1587,15 +1603,15 @@ static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_off return 0; } -static int mlx5_esw_bridge_vport_link_with_flags(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, - u16 flags, +static int mlx5_esw_bridge_vport_link_with_flags(struct net_device *br_netdev, u16 vport_num, + u16 esw_owner_vhca_id, u16 flags, struct mlx5_esw_bridge_offloads *br_offloads, struct netlink_ext_ack *extack) { struct mlx5_esw_bridge *bridge; int err; - bridge = mlx5_esw_bridge_lookup(ifindex, br_offloads); + bridge = mlx5_esw_bridge_lookup(br_netdev, br_offloads); if (IS_ERR(bridge)) { NL_SET_ERR_MSG_MOD(extack, "Error checking for existing bridge with same ifindex"); return PTR_ERR(bridge); @@ -1613,15 +1629,16 @@ err_vport: return err; } -int mlx5_esw_bridge_vport_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, +int mlx5_esw_bridge_vport_link(struct net_device *br_netdev, u16 vport_num, u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct netlink_ext_ack *extack) { - return mlx5_esw_bridge_vport_link_with_flags(ifindex, vport_num, esw_owner_vhca_id, 0, + return mlx5_esw_bridge_vport_link_with_flags(br_netdev, vport_num, esw_owner_vhca_id, 0, br_offloads, extack); } -int mlx5_esw_bridge_vport_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, +int mlx5_esw_bridge_vport_unlink(struct net_device *br_netdev, u16 vport_num, + u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct netlink_ext_ack *extack) { @@ -1633,7 +1650,7 @@ int mlx5_esw_bridge_vport_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_ NL_SET_ERR_MSG_MOD(extack, "Port is not attached to any bridge"); return -EINVAL; } - if (port->bridge->ifindex != ifindex) { + if (port->bridge->ifindex != br_netdev->ifindex) { NL_SET_ERR_MSG_MOD(extack, "Port is attached to another bridge"); return -EINVAL; } @@ -1644,23 +1661,25 @@ int mlx5_esw_bridge_vport_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_ return err; } -int mlx5_esw_bridge_vport_peer_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, +int mlx5_esw_bridge_vport_peer_link(struct net_device *br_netdev, u16 vport_num, + u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct netlink_ext_ack *extack) { if (!MLX5_CAP_ESW(br_offloads->esw->dev, merged_eswitch)) return 0; - return mlx5_esw_bridge_vport_link_with_flags(ifindex, vport_num, esw_owner_vhca_id, + return mlx5_esw_bridge_vport_link_with_flags(br_netdev, vport_num, esw_owner_vhca_id, MLX5_ESW_BRIDGE_PORT_FLAG_PEER, br_offloads, extack); } -int mlx5_esw_bridge_vport_peer_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, +int mlx5_esw_bridge_vport_peer_unlink(struct net_device *br_netdev, u16 vport_num, + u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct netlink_ext_ack *extack) { - return mlx5_esw_bridge_vport_unlink(ifindex, vport_num, esw_owner_vhca_id, br_offloads, + return mlx5_esw_bridge_vport_unlink(br_netdev, vport_num, esw_owner_vhca_id, br_offloads, extack); } @@ -1887,6 +1906,7 @@ struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw) xa_init(&br_offloads->ports); br_offloads->esw = esw; esw->br_offloads = br_offloads; + mlx5_esw_bridge_debugfs_offloads_init(br_offloads); return br_offloads; } @@ -1902,6 +1922,7 @@ void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw) mlx5_esw_bridge_flush(br_offloads); WARN_ON(!xa_empty(&br_offloads->ports)); + mlx5_esw_bridge_debugfs_offloads_cleanup(br_offloads); esw->br_offloads = NULL; kvfree(br_offloads); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h index a9dd18c73d6a..c2c7c70d99eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h @@ -10,6 +10,7 @@ #include <linux/xarray.h> #include "eswitch.h" +struct dentry; struct mlx5_flow_table; struct mlx5_flow_group; @@ -17,6 +18,7 @@ struct mlx5_esw_bridge_offloads { struct mlx5_eswitch *esw; struct list_head bridges; struct xarray ports; + struct dentry *debugfs_root; struct notifier_block netdev_nb; struct notifier_block nb_blk; @@ -43,16 +45,18 @@ struct mlx5_esw_bridge_offloads { struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw); void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw); -int mlx5_esw_bridge_vport_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, +int mlx5_esw_bridge_vport_link(struct net_device *br_netdev, u16 vport_num, u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct netlink_ext_ack *extack); -int mlx5_esw_bridge_vport_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, +int mlx5_esw_bridge_vport_unlink(struct net_device *br_netdev, u16 vport_num, u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct netlink_ext_ack *extack); -int mlx5_esw_bridge_vport_peer_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, +int mlx5_esw_bridge_vport_peer_link(struct net_device *br_netdev, u16 vport_num, + u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct netlink_ext_ack *extack); -int mlx5_esw_bridge_vport_peer_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, +int mlx5_esw_bridge_vport_peer_unlink(struct net_device *br_netdev, u16 vport_num, + u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct netlink_ext_ack *extack); void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c new file mode 100644 index 000000000000..dbd7cbe6cbf3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <linux/debugfs.h> +#include "bridge.h" +#include "bridge_priv.h" + +static void *mlx5_esw_bridge_debugfs_start(struct seq_file *seq, loff_t *pos); +static void *mlx5_esw_bridge_debugfs_next(struct seq_file *seq, void *v, loff_t *pos); +static void mlx5_esw_bridge_debugfs_stop(struct seq_file *seq, void *v); +static int mlx5_esw_bridge_debugfs_show(struct seq_file *seq, void *v); + +static const struct seq_operations mlx5_esw_bridge_debugfs_sops = { + .start = mlx5_esw_bridge_debugfs_start, + .next = mlx5_esw_bridge_debugfs_next, + .stop = mlx5_esw_bridge_debugfs_stop, + .show = mlx5_esw_bridge_debugfs_show, +}; +DEFINE_SEQ_ATTRIBUTE(mlx5_esw_bridge_debugfs); + +static void *mlx5_esw_bridge_debugfs_start(struct seq_file *seq, loff_t *pos) +{ + struct mlx5_esw_bridge *bridge = seq->private; + + rtnl_lock(); + return *pos ? seq_list_start(&bridge->fdb_list, *pos - 1) : SEQ_START_TOKEN; +} + +static void *mlx5_esw_bridge_debugfs_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct mlx5_esw_bridge *bridge = seq->private; + + return seq_list_next(v == SEQ_START_TOKEN ? &bridge->fdb_list : v, &bridge->fdb_list, pos); +} + +static void mlx5_esw_bridge_debugfs_stop(struct seq_file *seq, void *v) +{ + rtnl_unlock(); +} + +static int mlx5_esw_bridge_debugfs_show(struct seq_file *seq, void *v) +{ + struct mlx5_esw_bridge_fdb_entry *entry; + u64 packets, bytes, lastuse; + + if (v == SEQ_START_TOKEN) { + seq_printf(seq, "%-16s %-17s %4s %20s %20s %20s %5s\n", + "DEV", "MAC", "VLAN", "PACKETS", "BYTES", "LASTUSE", "FLAGS"); + return 0; + } + + entry = list_entry(v, struct mlx5_esw_bridge_fdb_entry, list); + mlx5_fc_query_cached_raw(entry->ingress_counter, &bytes, &packets, &lastuse); + seq_printf(seq, "%-16s %-17pM %4d %20llu %20llu %20llu %#5x\n", + entry->dev->name, entry->key.addr, entry->key.vid, packets, bytes, lastuse, + entry->flags); + return 0; +} + +void mlx5_esw_bridge_debugfs_init(struct net_device *br_netdev, struct mlx5_esw_bridge *bridge) +{ + if (!bridge->br_offloads->debugfs_root) + return; + + bridge->debugfs_dir = debugfs_create_dir(br_netdev->name, + bridge->br_offloads->debugfs_root); + debugfs_create_file("fdb", 0400, bridge->debugfs_dir, bridge, + &mlx5_esw_bridge_debugfs_fops); +} + +void mlx5_esw_bridge_debugfs_cleanup(struct mlx5_esw_bridge *bridge) +{ + debugfs_remove_recursive(bridge->debugfs_dir); + bridge->debugfs_dir = NULL; +} + +void mlx5_esw_bridge_debugfs_offloads_init(struct mlx5_esw_bridge_offloads *br_offloads) +{ + if (!br_offloads->esw->debugfs_root) + return; + + br_offloads->debugfs_root = debugfs_create_dir("bridge", br_offloads->esw->debugfs_root); +} + +void mlx5_esw_bridge_debugfs_offloads_cleanup(struct mlx5_esw_bridge_offloads *br_offloads) +{ + debugfs_remove_recursive(br_offloads->debugfs_root); + br_offloads->debugfs_root = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c index 2eae594a5e80..2455f8b93c1e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c @@ -540,16 +540,29 @@ static struct mlx5_flow_handle * mlx5_esw_bridge_mcast_filter_flow_peer_create(struct mlx5_esw_bridge_port *port) { struct mlx5_devcom *devcom = port->bridge->br_offloads->esw->dev->priv.devcom; + struct mlx5_eswitch *tmp, *peer_esw = NULL; static struct mlx5_flow_handle *handle; - struct mlx5_eswitch *peer_esw; + int i; - peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); - if (!peer_esw) + if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) return ERR_PTR(-ENODEV); + mlx5_devcom_for_each_peer_entry(devcom, + MLX5_DEVCOM_ESW_OFFLOADS, + tmp, i) { + if (mlx5_esw_is_owner(tmp, port->vport_num, port->esw_owner_vhca_id)) { + peer_esw = tmp; + break; + } + } + if (!peer_esw) { + mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + return ERR_PTR(-ENODEV); + } + handle = mlx5_esw_bridge_mcast_flow_with_esw_create(port, peer_esw); - mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); return handle; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h index c9595801bdb4..4911cc32161b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h @@ -199,6 +199,7 @@ struct mlx5_esw_bridge { int refcnt; struct list_head list; struct mlx5_esw_bridge_offloads *br_offloads; + struct dentry *debugfs_dir; struct list_head fdb_list; struct rhashtable fdb_ht; @@ -241,4 +242,9 @@ void mlx5_esw_bridge_port_mdb_vlan_flush(struct mlx5_esw_bridge_port *port, struct mlx5_esw_bridge_vlan *vlan); void mlx5_esw_bridge_mdb_flush(struct mlx5_esw_bridge *bridge); +void mlx5_esw_bridge_debugfs_offloads_init(struct mlx5_esw_bridge_offloads *br_offloads); +void mlx5_esw_bridge_debugfs_offloads_cleanup(struct mlx5_esw_bridge_offloads *br_offloads); +void mlx5_esw_bridge_debugfs_init(struct net_device *br_netdev, struct mlx5_esw_bridge *bridge); +void mlx5_esw_bridge_debugfs_cleanup(struct mlx5_esw_bridge *bridge); + #endif /* _MLX5_ESW_BRIDGE_PRIVATE_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index 084a910bb4e7..2170539461fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -18,23 +18,21 @@ static bool mlx5_esw_devlink_port_supported(struct mlx5_eswitch *esw, u16 vport_ { return vport_num == MLX5_VPORT_UPLINK || (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF) || - mlx5_eswitch_is_vf_vport(esw, vport_num); + mlx5_eswitch_is_vf_vport(esw, vport_num) || + mlx5_core_is_ec_vf_vport(esw->dev, vport_num); } -static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16 vport_num) +static void mlx5_esw_offloads_pf_vf_devlink_port_attrs_set(struct mlx5_eswitch *esw, + u16 vport_num, + struct devlink_port *dl_port) { struct mlx5_core_dev *dev = esw->dev; struct devlink_port_attrs attrs = {}; struct netdev_phys_item_id ppid = {}; - struct devlink_port *dl_port; u32 controller_num = 0; bool external; u16 pfnum; - dl_port = kzalloc(sizeof(*dl_port), GFP_KERNEL); - if (!dl_port) - return NULL; - mlx5_esw_get_port_parent_id(dev, &ppid); pfnum = mlx5_get_dev_index(dev); external = mlx5_core_is_ecpf_esw_manager(dev); @@ -56,15 +54,57 @@ static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16 dl_port->attrs.switch_id.id_len = ppid.id_len; devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum, vport_num - 1, external); + } else if (mlx5_core_is_ec_vf_vport(esw->dev, vport_num)) { + memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len); + dl_port->attrs.switch_id.id_len = ppid.id_len; + devlink_port_attrs_pci_vf_set(dl_port, 0, pfnum, + vport_num - 1, false); } - return dl_port; } -static void mlx5_esw_dl_port_free(struct devlink_port *dl_port) +int mlx5_esw_offloads_pf_vf_devlink_port_init(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct devlink_port *dl_port; + struct mlx5_vport *vport; + + if (!mlx5_esw_devlink_port_supported(esw, vport_num)) + return 0; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + dl_port = kzalloc(sizeof(*dl_port), GFP_KERNEL); + if (!dl_port) + return -ENOMEM; + + mlx5_esw_offloads_pf_vf_devlink_port_attrs_set(esw, vport_num, dl_port); + + vport->dl_port = dl_port; + return 0; +} + +void mlx5_esw_offloads_pf_vf_devlink_port_cleanup(struct mlx5_eswitch *esw, u16 vport_num) { - kfree(dl_port); + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport) || !vport->dl_port) + return; + + kfree(vport->dl_port); + vport->dl_port = NULL; } +static const struct devlink_port_ops mlx5_esw_dl_port_ops = { + .port_fn_hw_addr_get = mlx5_devlink_port_fn_hw_addr_get, + .port_fn_hw_addr_set = mlx5_devlink_port_fn_hw_addr_set, + .port_fn_roce_get = mlx5_devlink_port_fn_roce_get, + .port_fn_roce_set = mlx5_devlink_port_fn_roce_set, + .port_fn_migratable_get = mlx5_devlink_port_fn_migratable_get, + .port_fn_migratable_set = mlx5_devlink_port_fn_migratable_set, +}; + int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num) { struct mlx5_core_dev *dev = esw->dev; @@ -74,34 +114,29 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_ struct devlink *devlink; int err; - if (!mlx5_esw_devlink_port_supported(esw, vport_num)) - return 0; - vport = mlx5_eswitch_get_vport(esw, vport_num); if (IS_ERR(vport)) return PTR_ERR(vport); - dl_port = mlx5_esw_dl_port_alloc(esw, vport_num); + dl_port = vport->dl_port; if (!dl_port) - return -ENOMEM; + return 0; devlink = priv_to_devlink(dev); dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num); - err = devl_port_register(devlink, dl_port, dl_port_index); + err = devl_port_register_with_ops(devlink, dl_port, dl_port_index, + &mlx5_esw_dl_port_ops); if (err) - goto reg_err; + return err; err = devl_rate_leaf_create(dl_port, vport, NULL); if (err) goto rate_err; - vport->dl_port = dl_port; return 0; rate_err: devl_port_unregister(dl_port); -reg_err: - mlx5_esw_dl_port_free(dl_port); return err; } @@ -109,11 +144,8 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vpo { struct mlx5_vport *vport; - if (!mlx5_esw_devlink_port_supported(esw, vport_num)) - return; - vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) + if (IS_ERR(vport) || !vport->dl_port) return; if (vport->dl_port->devlink_rate) { @@ -122,8 +154,6 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vpo } devl_port_unregister(vport->dl_port); - mlx5_esw_dl_port_free(vport->dl_port); - vport->dl_port = NULL; } struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num) @@ -134,6 +164,20 @@ struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u1 return IS_ERR(vport) ? ERR_CAST(vport) : vport->dl_port; } +static const struct devlink_port_ops mlx5_esw_dl_sf_port_ops = { +#ifdef CONFIG_MLX5_SF_MANAGER + .port_del = mlx5_devlink_sf_port_del, +#endif + .port_fn_hw_addr_get = mlx5_devlink_port_fn_hw_addr_get, + .port_fn_hw_addr_set = mlx5_devlink_port_fn_hw_addr_set, + .port_fn_roce_get = mlx5_devlink_port_fn_roce_get, + .port_fn_roce_set = mlx5_devlink_port_fn_roce_set, +#ifdef CONFIG_MLX5_SF_MANAGER + .port_fn_state_get = mlx5_devlink_sf_port_fn_state_get, + .port_fn_state_set = mlx5_devlink_sf_port_fn_state_set, +#endif +}; + int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port, u16 vport_num, u32 controller, u32 sfnum) { @@ -156,7 +200,8 @@ int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_p devlink_port_attrs_pci_sf_set(dl_port, controller, pfnum, sfnum, !!controller); devlink = priv_to_devlink(dev); dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num); - err = devl_port_register(devlink, dl_port, dl_port_index); + err = devl_port_register_with_ops(devlink, dl_port, dl_port_index, + &mlx5_esw_dl_sf_port_ops); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c index fabe49a35a5c..255bc8b749f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -285,9 +285,8 @@ static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw, if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); goto out; - } else { - esw->fdb_table.legacy.vepa_uplink_rule = flow_rule; } + esw->fdb_table.legacy.vepa_uplink_rule = flow_rule; /* Star rule to forward all traffic to uplink vport */ memset(&dest, 0, sizeof(dest)); @@ -299,9 +298,8 @@ static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw, if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); goto out; - } else { - esw->fdb_table.legacy.vepa_star_rule = flow_rule; } + esw->fdb_table.legacy.vepa_star_rule = flow_rule; out: kvfree(spec); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 901c53751b0a..6e9b1b183190 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -31,6 +31,7 @@ */ #include <linux/etherdevice.h> +#include <linux/debugfs.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/mlx5_ifc.h> #include <linux/mlx5/vport.h> @@ -41,6 +42,7 @@ #include "esw/qos.h" #include "mlx5_core.h" #include "lib/eq.h" +#include "lag/lag.h" #include "eswitch.h" #include "fs_core.h" #include "devlink.h" @@ -92,7 +94,7 @@ mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num) { struct mlx5_vport *vport; - if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager)) + if (!esw) return ERR_PTR(-EPERM); vport = xa_load(&esw->vports, vport_num); @@ -113,7 +115,8 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1); MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); - MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + if (vport || mlx5_core_is_ecpf(dev)) + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, nic_vport_context); @@ -309,11 +312,12 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) fdb_add: /* SRIOV is enabled: Forward UC MAC to vport */ - if (esw->fdb_table.legacy.fdb && esw->mode == MLX5_ESWITCH_LEGACY) + if (esw->fdb_table.legacy.fdb && esw->mode == MLX5_ESWITCH_LEGACY) { vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); - esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n", - vport, mac, vaddr->flow_rule); + esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n", + vport, mac, vaddr->flow_rule); + } return 0; } @@ -710,6 +714,9 @@ void esw_vport_change_handle_locked(struct mlx5_vport *vport) struct mlx5_eswitch *esw = dev->priv.eswitch; u8 mac[ETH_ALEN]; + if (!MLX5_CAP_GEN(dev, log_max_l2_table)) + return; + mlx5_query_nic_vport_mac_address(dev, vport->vport, true, mac); esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n", vport->vport, mac); @@ -800,6 +807,9 @@ static int mlx5_esw_vport_caps_get(struct mlx5_eswitch *esw, struct mlx5_vport * hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); vport->info.roce_enabled = MLX5_GET(cmd_hca_cap, hca_caps, roce); + if (!MLX5_CAP_GEN_MAX(esw->dev, hca_cap_2)) + goto out_free; + memset(query_ctx, 0, query_out_sz); err = mlx5_vport_get_other_func_cap(esw->dev, vport->vport, query_ctx, MLX5_CAP_GENERAL_2); @@ -946,7 +956,8 @@ void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) vport->enabled = false; /* Disable events from this vport */ - arm_vport_context_events_cmd(esw->dev, vport->vport, 0); + if (MLX5_CAP_GEN(esw->dev, log_max_l2_table)) + arm_vport_context_events_cmd(esw->dev, vport->vport, 0); if (!mlx5_esw_is_manager_vport(esw, vport->vport) && MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) @@ -1045,6 +1056,18 @@ static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw) } } +static void mlx5_eswitch_clear_ec_vf_vports_info(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs) { + memset(&vport->qos, 0, sizeof(vport->qos)); + memset(&vport->info, 0, sizeof(vport->info)); + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; + } +} + /* Public E-Switch API */ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, enum mlx5_eswitch_vport_event enabled_events) @@ -1055,7 +1078,7 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, if (err) return err; - err = esw_offloads_load_rep(esw, vport_num); + err = mlx5_esw_offloads_load_rep(esw, vport_num); if (err) goto err_rep; @@ -1068,10 +1091,35 @@ err_rep: void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num) { - esw_offloads_unload_rep(esw, vport_num); + mlx5_esw_offloads_unload_rep(esw, vport_num); mlx5_esw_vport_disable(esw, vport_num); } +static int mlx5_eswitch_load_pf_vf_vport(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events) +{ + int err; + + err = mlx5_esw_offloads_init_pf_vf_rep(esw, vport_num); + if (err) + return err; + + err = mlx5_eswitch_load_vport(esw, vport_num, enabled_events); + if (err) + goto err_load; + return 0; + +err_load: + mlx5_esw_offloads_cleanup_pf_vf_rep(esw, vport_num); + return err; +} + +static void mlx5_eswitch_unload_pf_vf_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + mlx5_eswitch_unload_vport(esw, vport_num); + mlx5_esw_offloads_cleanup_pf_vf_rep(esw, vport_num); +} + void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs) { struct mlx5_vport *vport; @@ -1080,7 +1128,20 @@ void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs) mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) { if (!vport->enabled) continue; - mlx5_eswitch_unload_vport(esw, vport->vport); + mlx5_eswitch_unload_pf_vf_vport(esw, vport->vport); + } +} + +static void mlx5_eswitch_unload_ec_vf_vports(struct mlx5_eswitch *esw, + u16 num_ec_vfs) +{ + struct mlx5_vport *vport; + unsigned long i; + + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, num_ec_vfs) { + if (!vport->enabled) + continue; + mlx5_eswitch_unload_pf_vf_vport(esw, vport->vport); } } @@ -1092,7 +1153,7 @@ int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, int err; mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) { - err = mlx5_eswitch_load_vport(esw, vport->vport, enabled_events); + err = mlx5_eswitch_load_pf_vf_vport(esw, vport->vport, enabled_events); if (err) goto vf_err; } @@ -1104,6 +1165,26 @@ vf_err: return err; } +static int mlx5_eswitch_load_ec_vf_vports(struct mlx5_eswitch *esw, u16 num_ec_vfs, + enum mlx5_eswitch_vport_event enabled_events) +{ + struct mlx5_vport *vport; + unsigned long i; + int err; + + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, num_ec_vfs) { + err = mlx5_eswitch_load_pf_vf_vport(esw, vport->vport, enabled_events); + if (err) + goto vf_err; + } + + return 0; + +vf_err: + mlx5_eswitch_unload_ec_vf_vports(esw, num_ec_vfs); + return err; +} + static int host_pf_enable_hca(struct mlx5_core_dev *dev) { if (!mlx5_core_is_ecpf(dev)) @@ -1131,12 +1212,19 @@ int mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, enum mlx5_eswitch_vport_event enabled_events) { + bool pf_needed; int ret; + pf_needed = mlx5_core_is_ecpf_esw_manager(esw->dev) || + esw->mode == MLX5_ESWITCH_LEGACY; + /* Enable PF vport */ - ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_PF, enabled_events); - if (ret) - return ret; + if (pf_needed) { + ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_PF, + enabled_events); + if (ret) + return ret; + } /* Enable external host PF HCA */ ret = host_pf_enable_hca(esw->dev); @@ -1145,9 +1233,15 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, /* Enable ECPF vport */ if (mlx5_ecpf_vport_exists(esw->dev)) { - ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_ECPF, enabled_events); + ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_ECPF, enabled_events); if (ret) goto ecpf_err; + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + ret = mlx5_eswitch_load_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs, + enabled_events); + if (ret) + goto ec_vf_err; + } } /* Enable VF vports */ @@ -1158,12 +1252,16 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, return 0; vf_err: + if (mlx5_core_ec_sriov_enabled(esw->dev)) + mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs); +ec_vf_err: if (mlx5_ecpf_vport_exists(esw->dev)) - mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF); + mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF); ecpf_err: host_pf_disable_hca(esw->dev); pf_hca_err: - mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF); + if (pf_needed) + mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF); return ret; } @@ -1174,11 +1272,17 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) { mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); - if (mlx5_ecpf_vport_exists(esw->dev)) - mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF); + if (mlx5_ecpf_vport_exists(esw->dev)) { + if (mlx5_core_ec_sriov_enabled(esw->dev)) + mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_vfs); + mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF); + } host_pf_disable_hca(esw->dev); - mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF); + + if (mlx5_core_is_ecpf_esw_manager(esw->dev) || + esw->mode == MLX5_ESWITCH_LEGACY) + mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF); } static void mlx5_eswitch_get_devlink_param(struct mlx5_eswitch *esw) @@ -1219,6 +1323,9 @@ mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, int num_vfs) esw->esw_funcs.num_vfs = MLX5_GET(query_esw_functions_out, out, host_params_context.host_num_of_vfs); + if (mlx5_core_ec_sriov_enabled(esw->dev)) + esw->esw_funcs.num_ec_vfs = num_vfs; + kvfree(out); } @@ -1326,9 +1433,9 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) mlx5_eswitch_event_handlers_register(esw); - esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), active vports(%d)\n", + esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", - esw->esw_funcs.num_vfs, esw->enabled_vports); + esw->esw_funcs.num_vfs, esw->esw_funcs.num_ec_vfs, esw->enabled_vports); mlx5_esw_mode_change_notify(esw, esw->mode); @@ -1350,7 +1457,7 @@ abort: int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { bool toggle_lag; - int ret; + int ret = 0; if (!mlx5_esw_allowed(esw)) return 0; @@ -1370,10 +1477,21 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) vport_events = (esw->mode == MLX5_ESWITCH_LEGACY) ? MLX5_LEGACY_SRIOV_VPORT_EVENTS : MLX5_VPORT_UC_ADDR_CHANGE; - ret = mlx5_eswitch_load_vf_vports(esw, num_vfs, vport_events); - if (!ret) - esw->esw_funcs.num_vfs = num_vfs; + /* If this is the ECPF the number of host VFs is managed via the + * eswitch function change event handler, and any num_vfs provided + * here are intended to be EC VFs. + */ + if (!mlx5_core_is_ecpf(esw->dev)) { + ret = mlx5_eswitch_load_vf_vports(esw, num_vfs, vport_events); + if (!ret) + esw->esw_funcs.num_vfs = num_vfs; + } else if (mlx5_core_ec_sriov_enabled(esw->dev)) { + ret = mlx5_eswitch_load_ec_vf_vports(esw, num_vfs, vport_events); + if (!ret) + esw->esw_funcs.num_ec_vfs = num_vfs; + } } + up_write(&esw->mode_lock); if (toggle_lag) @@ -1393,16 +1511,22 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) /* If driver is unloaded, this function is called twice by remove_one() * and mlx5_unload(). Prevent the second call. */ - if (!esw->esw_funcs.num_vfs && !clear_vf) + if (!esw->esw_funcs.num_vfs && !esw->esw_funcs.num_ec_vfs && !clear_vf) goto unlock; - esw_info(esw->dev, "Unload vfs: mode(%s), nvfs(%d), active vports(%d)\n", + esw_info(esw->dev, "Unload vfs: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", - esw->esw_funcs.num_vfs, esw->enabled_vports); - - mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); - if (clear_vf) - mlx5_eswitch_clear_vf_vports_info(esw); + esw->esw_funcs.num_vfs, esw->esw_funcs.num_ec_vfs, esw->enabled_vports); + + if (!mlx5_core_is_ecpf(esw->dev)) { + mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); + if (clear_vf) + mlx5_eswitch_clear_vf_vports_info(esw); + } else if (mlx5_core_ec_sriov_enabled(esw->dev)) { + mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs); + if (clear_vf) + mlx5_eswitch_clear_ec_vf_vports_info(esw); + } if (esw->mode == MLX5_ESWITCH_OFFLOADS) { struct devlink *devlink = priv_to_devlink(esw->dev); @@ -1413,7 +1537,10 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) if (esw->mode == MLX5_ESWITCH_LEGACY) mlx5_eswitch_disable_locked(esw); - esw->esw_funcs.num_vfs = 0; + if (!mlx5_core_is_ecpf(esw->dev)) + esw->esw_funcs.num_vfs = 0; + else + esw->esw_funcs.num_ec_vfs = 0; unlock: up_write(&esw->mode_lock); @@ -1433,9 +1560,9 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw) mlx5_eswitch_event_handlers_unregister(esw); - esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n", + esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", - esw->esw_funcs.num_vfs, esw->enabled_vports); + esw->esw_funcs.num_vfs, esw->esw_funcs.num_ec_vfs, esw->enabled_vports); if (esw->fdb_table.flags & MLX5_ESW_FDB_CREATED) { esw->fdb_table.flags &= ~MLX5_ESW_FDB_CREATED; @@ -1595,7 +1722,19 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw) idx++; } - if (mlx5_ecpf_vport_exists(dev)) { + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + int ec_vf_base_num = mlx5_core_ec_vf_vport_base(dev); + + for (i = 0; i < mlx5_core_max_ec_vfs(esw->dev); i++) { + err = mlx5_esw_vport_alloc(esw, idx, ec_vf_base_num + i); + if (err) + goto err; + idx++; + } + } + + if (mlx5_ecpf_vport_exists(dev) || + mlx5_core_is_ecpf_esw_manager(dev)) { err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_ECPF); if (err) goto err; @@ -1611,22 +1750,60 @@ err: return err; } +static int mlx5_devlink_esw_multiport_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!MLX5_ESWITCH_MANAGER(dev)) + return -EOPNOTSUPP; + + if (ctx->val.vbool) + return mlx5_lag_mpesw_enable(dev); + + mlx5_lag_mpesw_disable(dev); + return 0; +} + +static int mlx5_devlink_esw_multiport_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + ctx->val.vbool = mlx5_lag_is_mpesw(dev); + return 0; +} + +static const struct devlink_param mlx5_eswitch_params[] = { + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT, + "esw_multiport", DEVLINK_PARAM_TYPE_BOOL, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlx5_devlink_esw_multiport_get, + mlx5_devlink_esw_multiport_set, NULL), +}; + int mlx5_eswitch_init(struct mlx5_core_dev *dev) { struct mlx5_eswitch *esw; int err; - if (!MLX5_VPORT_MANAGER(dev)) + if (!MLX5_VPORT_MANAGER(dev) && !MLX5_ESWITCH_MANAGER(dev)) return 0; esw = kzalloc(sizeof(*esw), GFP_KERNEL); if (!esw) return -ENOMEM; + err = devl_params_register(priv_to_devlink(dev), mlx5_eswitch_params, + ARRAY_SIZE(mlx5_eswitch_params)); + if (err) + goto free_esw; + esw->dev = dev; esw->manager_vport = mlx5_eswitch_manager_vport(dev); esw->first_host_vport = mlx5_eswitch_first_host_vport_num(dev); + esw->debugfs_root = debugfs_create_dir("esw", mlx5_debugfs_get_dev_root(dev)); esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); if (!esw->work_queue) { err = -ENOMEM; @@ -1680,13 +1857,17 @@ reps_err: abort: if (esw->work_queue) destroy_workqueue(esw->work_queue); + debugfs_remove_recursive(esw->debugfs_root); + devl_params_unregister(priv_to_devlink(dev), mlx5_eswitch_params, + ARRAY_SIZE(mlx5_eswitch_params)); +free_esw: kfree(esw); return err; } void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) { - if (!esw || !MLX5_VPORT_MANAGER(esw->dev)) + if (!esw) return; esw_info(esw->dev, "cleanup\n"); @@ -1703,6 +1884,9 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) mutex_destroy(&esw->offloads.decap_tbl_lock); esw_offloads_cleanup(esw); mlx5_esw_vports_cleanup(esw); + debugfs_remove_recursive(esw->debugfs_root); + devl_params_unregister(priv_to_devlink(esw->dev), mlx5_eswitch_params, + ARRAY_SIZE(mlx5_eswitch_params)); kfree(esw); } @@ -1763,12 +1947,6 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, static bool mlx5_esw_check_port_type(struct mlx5_eswitch *esw, u16 vport_num, xa_mark_t mark) { - struct mlx5_vport *vport; - - vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) - return false; - return xa_get_mark(&esw->vports, vport_num, mark); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index add6cfa432a5..56d9a261a5c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -123,8 +123,14 @@ struct vport_ingress { } offloads; }; +enum vport_egress_acl_type { + VPORT_EGRESS_ACL_TYPE_DEFAULT, + VPORT_EGRESS_ACL_TYPE_SHARED_FDB, +}; + struct vport_egress { struct mlx5_flow_table *acl; + enum vport_egress_acl_type type; struct mlx5_flow_handle *allowed_vlan; struct mlx5_flow_group *vlan_grp; union { @@ -136,7 +142,7 @@ struct vport_egress { struct { struct mlx5_flow_group *fwd_grp; struct mlx5_flow_handle *fwd_rule; - struct mlx5_flow_handle *bounce_rule; + struct xarray bounce_rules; struct mlx5_flow_group *bounce_grp; } offloads; }; @@ -218,7 +224,7 @@ struct mlx5_eswitch_fdb { struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *send_to_vport_meta_grp; struct mlx5_flow_group *peer_miss_grp; - struct mlx5_flow_handle **peer_miss_rules; + struct mlx5_flow_handle **peer_miss_rules[MLX5_MAX_PORTS]; struct mlx5_flow_group *miss_grp; struct mlx5_flow_handle **send_to_vport_meta_rules; struct mlx5_flow_handle *miss_rule_uni; @@ -249,7 +255,7 @@ struct mlx5_esw_offload { struct mlx5_flow_group *vport_rx_drop_group; struct mlx5_flow_handle *vport_rx_drop_rule; struct xarray vport_reps; - struct list_head peer_flows; + struct list_head peer_flows[MLX5_MAX_PORTS]; struct mutex peer_mutex; struct mutex encap_tbl_lock; /* protects encap_tbl */ DECLARE_HASHTABLE(encap_tbl, 8); @@ -283,6 +289,7 @@ struct mlx5_host_work { struct mlx5_esw_functions { struct mlx5_nb nb; u16 num_vfs; + u16 num_ec_vfs; }; enum { @@ -297,6 +304,8 @@ enum { MLX5_ESW_FDB_CREATED = BIT(0), }; +struct dentry; + struct mlx5_eswitch { struct mlx5_core_dev *dev; struct mlx5_nb nb; @@ -305,6 +314,7 @@ struct mlx5_eswitch { struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; struct esw_mc_addr mc_promisc; /* end of legacy */ + struct dentry *debugfs_root; struct workqueue_struct *work_queue; struct xarray vports; u32 flags; @@ -337,12 +347,13 @@ struct mlx5_eswitch { int mode; u16 manager_vport; u16 first_host_vport; + u8 num_peers; struct mlx5_esw_functions esw_funcs; struct { u32 large_group_num; } params; struct blocking_notifier_head n_head; - bool paired[MLX5_MAX_PORTS]; + struct xarray paired; }; void esw_offloads_disable(struct mlx5_eswitch *esw); @@ -506,12 +517,12 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, struct netlink_ext_ack *extack); int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, enum devlink_eswitch_encap_mode *encap); -int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port, - u8 *hw_addr, int *hw_addr_len, - struct netlink_ext_ack *extack); -int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port, - const u8 *hw_addr, int hw_addr_len, - struct netlink_ext_ack *extack); +int mlx5_devlink_port_fn_hw_addr_get(struct devlink_port *port, + u8 *hw_addr, int *hw_addr_len, + struct netlink_ext_ack *extack); +int mlx5_devlink_port_fn_hw_addr_set(struct devlink_port *port, + const u8 *hw_addr, int hw_addr_len, + struct netlink_ext_ack *extack); int mlx5_devlink_port_fn_roce_get(struct devlink_port *port, bool *is_enabled, struct netlink_ext_ack *extack); int mlx5_devlink_port_fn_roce_set(struct devlink_port *port, bool enable, @@ -578,6 +589,13 @@ mlx5_esw_is_manager_vport(const struct mlx5_eswitch *esw, u16 vport_num) return esw->manager_vport == vport_num; } +static inline bool mlx5_esw_is_owner(struct mlx5_eswitch *esw, u16 vport_num, + u16 esw_owner_vhca_id) +{ + return esw_owner_vhca_id == MLX5_CAP_GEN(esw->dev, vhca_id) || + (vport_num == MLX5_VPORT_UPLINK && mlx5_lag_is_master(esw->dev)); +} + static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev) { return mlx5_core_is_ecpf_esw_manager(dev) ? @@ -640,6 +658,19 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); #define mlx5_esw_for_each_host_func_vport(esw, index, vport, last) \ mlx5_esw_for_each_vport_marked(esw, index, vport, last, MLX5_ESW_VPT_HOST_FN) +/* This macro should only be used if EC SRIOV is enabled. + * + * Because there were no more marks available on the xarray this uses a + * for_each_range approach. The range is only valid when EC SRIOV is enabled + */ +#define mlx5_esw_for_each_ec_vf_vport(esw, index, vport, last) \ + xa_for_each_range(&((esw)->vports), \ + index, \ + vport, \ + MLX5_CAP_GEN_2((esw->dev), ec_vf_vport_base), \ + MLX5_CAP_GEN_2((esw->dev), ec_vf_vport_base) +\ + (last) - 1) + struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink); struct mlx5_vport *__must_check mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num); @@ -686,8 +717,18 @@ mlx5_esw_vporttbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr struct mlx5_flow_handle * esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag); -int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num); -void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num); +void mlx5_esw_set_flow_group_source_port(struct mlx5_eswitch *esw, + u32 *flow_group_in, + int match_params); + +void mlx5_esw_set_spec_source_port(struct mlx5_eswitch *esw, + u16 vport, + struct mlx5_flow_spec *spec); + +int mlx5_esw_offloads_init_pf_vf_rep(struct mlx5_eswitch *esw, u16 vport_num); +void mlx5_esw_offloads_cleanup_pf_vf_rep(struct mlx5_eswitch *esw, u16 vport_num); +int mlx5_esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num); +void mlx5_esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num); int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num); void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num); @@ -700,6 +741,8 @@ int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, enum mlx5_eswitch_vport_event enabled_events); void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs); +int mlx5_esw_offloads_pf_vf_devlink_port_init(struct mlx5_eswitch *esw, u16 vport_num); +void mlx5_esw_offloads_pf_vf_devlink_port_cleanup(struct mlx5_eswitch *esw, u16 vport_num); int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num); void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num); struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num); @@ -740,9 +783,9 @@ void esw_vport_change_handle_locked(struct mlx5_vport *vport); bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller); -int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, - struct mlx5_eswitch *slave_esw); -void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, +int mlx5_eswitch_offloads_single_fdb_add_one(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw, int max_slaves); +void mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw, struct mlx5_eswitch *slave_esw); int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw); @@ -757,6 +800,13 @@ static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw) return 0; } +static inline int mlx5_eswitch_get_npeers(struct mlx5_eswitch *esw) +{ + if (mlx5_esw_allowed(esw)) + return esw->num_peers; + return 0; +} + static inline struct mlx5_flow_table * mlx5_eswitch_get_slow_fdb(struct mlx5_eswitch *esw) { @@ -794,16 +844,18 @@ mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev, } static inline int -mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, - struct mlx5_eswitch *slave_esw) +mlx5_eswitch_offloads_single_fdb_add_one(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw, int max_slaves) { return 0; } static inline void -mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, +mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw, struct mlx5_eswitch *slave_esw) {} +static inline int mlx5_eswitch_get_npeers(struct mlx5_eswitch *esw) { return 0; } + static inline int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 8d19c20d3447..1ad5a72dcc3f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -55,13 +55,6 @@ #define mlx5_esw_for_each_rep(esw, i, rep) \ xa_for_each(&((esw)->offloads.vport_reps), i, rep) -#define mlx5_esw_for_each_sf_rep(esw, i, rep) \ - xa_for_each_marked(&((esw)->offloads.vport_reps), i, rep, MLX5_ESW_VPT_SF) - -#define mlx5_esw_for_each_vf_rep(esw, index, rep) \ - mlx5_esw_for_each_entry_marked(&((esw)->offloads.vport_reps), index, \ - rep, (esw)->esw_funcs.num_vfs, MLX5_ESW_VPT_VF) - /* There are two match-all miss flows, one for unicast dst mac and * one for multicast. */ @@ -838,6 +831,7 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, struct mlx5_flow_handle *flow_rule; struct mlx5_flow_spec *spec; void *misc; + u16 vport; spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) { @@ -847,20 +841,43 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn); - /* source vport is the esw manager */ - MLX5_SET(fte_match_set_misc, misc, source_port, from_esw->manager_vport); - if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch)) - MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, - MLX5_CAP_GEN(from_esw->dev, vhca_id)); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch)) - MLX5_SET_TO_ONES(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id); spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + + /* source vport is the esw manager */ + vport = from_esw->manager_vport; + + if (mlx5_eswitch_vport_match_metadata_enabled(on_esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(from_esw, vport)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + + if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch)) + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(from_esw->dev, vhca_id)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch)) + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + } + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport.num = rep->vport; dest.vport.vhca_id = MLX5_CAP_GEN(rep->esw->dev, vhca_id); @@ -1052,6 +1069,9 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, void *misc; int err; + if (!MLX5_VPORT_MANAGER(esw->dev) && !mlx5_core_is_ecpf_esw_manager(esw->dev)) + return 0; + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) return -ENOMEM; @@ -1108,11 +1128,32 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, flows[vport->index] = flow; } - esw->fdb_table.offloads.peer_miss_rules = flows; + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { + if (i >= mlx5_core_max_ec_vfs(peer_dev)) + break; + esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch, + spec, vport->vport); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_ec_vf_flow_err; + } + flows[vport->index] = flow; + } + } + esw->fdb_table.offloads.peer_miss_rules[mlx5_get_dev_index(peer_dev)] = flows; kvfree(spec); return 0; +add_ec_vf_flow_err: + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { + if (!flows[vport->index]) + continue; + mlx5_del_flow_rules(flows[vport->index]); + } add_vf_flow_err: mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) { if (!flows[vport->index]) @@ -1136,13 +1177,28 @@ alloc_flows_err: return err; } -static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw) +static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw, + struct mlx5_core_dev *peer_dev) { + u16 peer_index = mlx5_get_dev_index(peer_dev); struct mlx5_flow_handle **flows; struct mlx5_vport *vport; unsigned long i; - flows = esw->fdb_table.offloads.peer_miss_rules; + flows = esw->fdb_table.offloads.peer_miss_rules[peer_index]; + if (!flows) + return; + + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { + /* The flow for a particular vport could be NULL if the other ECPF + * has fewer or no VFs enabled + */ + if (!flows[vport->index]) + continue; + mlx5_del_flow_rules(flows[vport->index]); + } + } mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) mlx5_del_flow_rules(flows[vport->index]); @@ -1156,7 +1212,9 @@ static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw) vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); mlx5_del_flow_rules(flows[vport->index]); } + kvfree(flows); + esw->fdb_table.offloads.peer_miss_rules[peer_index] = NULL; } static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) @@ -1269,8 +1327,10 @@ esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) #define MAX_PF_SQ 256 #define MAX_SQ_NVPORTS 32 -static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw, - u32 *flow_group_in) +void +mlx5_esw_set_flow_group_source_port(struct mlx5_eswitch *esw, + u32 *flow_group_in, + int match_params) { void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, @@ -1279,7 +1339,7 @@ static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw, if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS_2); + MLX5_MATCH_MISC_PARAMETERS_2 | match_params); MLX5_SET(fte_match_param, match_criteria, misc_parameters_2.metadata_reg_c_0, @@ -1287,7 +1347,7 @@ static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw, } else { MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS); + MLX5_MATCH_MISC_PARAMETERS | match_params); MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); @@ -1376,7 +1436,6 @@ esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb) esw_init_chains_offload_flags(esw, &attr.flags); attr.ns = MLX5_FLOW_NAMESPACE_FDB; - attr.fs_base_prio = FDB_TC_OFFLOAD; attr.max_grp_num = esw->params.large_group_num; attr.default_ft = miss_fdb; attr.mapping = esw->offloads.reg_c0_obj_pool; @@ -1463,14 +1522,13 @@ esw_create_send_to_vport_group(struct mlx5_eswitch *esw, memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS); + mlx5_esw_set_flow_group_source_port(esw, flow_group_in, MLX5_MATCH_MISC_PARAMETERS); match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw) && + MLX5_CAP_ESW(esw->dev, merged_eswitch)) { MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_eswitch_owner_vhca_id); MLX5_SET(create_flow_group_in, flow_group_in, @@ -1548,6 +1606,7 @@ esw_create_peer_esw_miss_group(struct mlx5_eswitch *esw, u32 *flow_group_in, int *ix) { + int max_peer_ports = (esw->total_vports - 1) * (MLX5_MAX_PORTS - 1); int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_group *g; void *match_criteria; @@ -1558,7 +1617,7 @@ esw_create_peer_esw_miss_group(struct mlx5_eswitch *esw, memset(flow_group_in, 0, inlen); - esw_set_flow_group_source_port(esw, flow_group_in); + mlx5_esw_set_flow_group_source_port(esw, flow_group_in, 0); if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) { match_criteria = MLX5_ADDR_OF(create_flow_group_in, @@ -1574,8 +1633,8 @@ esw_create_peer_esw_miss_group(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, - *ix + esw->total_vports - 1); - *ix += esw->total_vports; + *ix + max_peer_ports); + *ix += max_peer_ports + 1; g = mlx5_create_flow_group(fdb, flow_group_in); if (IS_ERR(g)) { @@ -1677,7 +1736,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) * total vports of the peer (currently is also uses esw->total_vports). */ table_size = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) + - esw->total_vports * 2 + MLX5_ESW_MISS_FLOWS; + esw->total_vports * MLX5_MAX_PORTS + MLX5_ESW_MISS_FLOWS; /* create the slow path fdb with encap set, so further table instances * can be created at run time while VFs are probed if the FW allows that. @@ -1844,8 +1903,7 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) if (!flow_group_in) return -ENOMEM; - /* create vport rx group */ - esw_set_flow_group_source_port(esw, flow_group_in); + mlx5_esw_set_flow_group_source_port(esw, flow_group_in, 0); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1); @@ -1915,21 +1973,13 @@ static void esw_destroy_vport_rx_drop_group(struct mlx5_eswitch *esw) mlx5_destroy_flow_group(esw->offloads.vport_rx_drop_group); } -struct mlx5_flow_handle * -mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, - struct mlx5_flow_destination *dest) +void +mlx5_esw_set_spec_source_port(struct mlx5_eswitch *esw, + u16 vport, + struct mlx5_flow_spec *spec) { - struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_handle *flow_rule; - struct mlx5_flow_spec *spec; void *misc; - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) { - flow_rule = ERR_PTR(-ENOMEM); - goto out; - } - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, @@ -1949,6 +1999,23 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; } +} + +struct mlx5_flow_handle * +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_spec *spec; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + flow_rule = ERR_PTR(-ENOMEM); + goto out; + } + + mlx5_esw_set_spec_source_port(esw, vport, spec); flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec, @@ -2142,6 +2209,7 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, "Failed setting eswitch to offloads"); esw->mode = MLX5_ESWITCH_LEGACY; mlx5_rescan_drivers(esw->dev); + return err; } if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) { if (mlx5_eswitch_inline_mode_get(esw, @@ -2151,19 +2219,7 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, "Inline mode is different between vports"); } } - return err; -} - -static void mlx5_esw_offloads_rep_mark_set(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep, - xa_mark_t mark) -{ - bool mark_set; - - /* Copy the mark from vport to its rep */ - mark_set = xa_get_mark(&esw->vports, rep->vport, mark); - if (mark_set) - xa_set_mark(&esw->offloads.vport_reps, rep->vport, mark); + return 0; } static int mlx5_esw_offloads_rep_init(struct mlx5_eswitch *esw, const struct mlx5_vport *vport) @@ -2185,9 +2241,6 @@ static int mlx5_esw_offloads_rep_init(struct mlx5_eswitch *esw, const struct mlx if (err) goto insert_err; - mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_HOST_FN); - mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_VF); - mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_SF); return 0; insert_err: @@ -2328,37 +2381,13 @@ static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, esw->offloads.rep_ops[rep_type]->unload(rep); } -static void __unload_reps_sf_vport(struct mlx5_eswitch *esw, u8 rep_type) -{ - struct mlx5_eswitch_rep *rep; - unsigned long i; - - mlx5_esw_for_each_sf_rep(esw, i, rep) - __esw_offloads_unload_rep(esw, rep, rep_type); -} - static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; unsigned long i; - __unload_reps_sf_vport(esw, rep_type); - - mlx5_esw_for_each_vf_rep(esw, i, rep) - __esw_offloads_unload_rep(esw, rep, rep_type); - - if (mlx5_ecpf_vport_exists(esw->dev)) { - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF); - __esw_offloads_unload_rep(esw, rep, rep_type); - } - - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); + mlx5_esw_for_each_rep(esw, i, rep) __esw_offloads_unload_rep(esw, rep, rep_type); - } - - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); - __esw_offloads_unload_rep(esw, rep, rep_type); } int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num) @@ -2395,7 +2424,23 @@ void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num) __esw_offloads_unload_rep(esw, rep, rep_type); } -int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num) +int mlx5_esw_offloads_init_pf_vf_rep(struct mlx5_eswitch *esw, u16 vport_num) +{ + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return 0; + + return mlx5_esw_offloads_pf_vf_devlink_port_init(esw, vport_num); +} + +void mlx5_esw_offloads_cleanup_pf_vf_rep(struct mlx5_eswitch *esw, u16 vport_num) +{ + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return; + + mlx5_esw_offloads_pf_vf_devlink_port_cleanup(esw, vport_num); +} + +int mlx5_esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num) { int err; @@ -2419,7 +2464,7 @@ load_err: return err; } -void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num) +void mlx5_esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num) { if (esw->mode != MLX5_ESWITCH_OFFLOADS) return; @@ -2476,6 +2521,7 @@ static int __esw_set_master_egress_rule(struct mlx5_core_dev *master, struct mlx5_vport *vport, struct mlx5_flow_table *acl) { + u16 slave_index = MLX5_CAP_GEN(slave, vhca_id); struct mlx5_flow_handle *flow_rule = NULL; struct mlx5_flow_destination dest = {}; struct mlx5_flow_act flow_act = {}; @@ -2491,8 +2537,7 @@ static int __esw_set_master_egress_rule(struct mlx5_core_dev *master, misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK); - MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, - MLX5_CAP_GEN(slave, vhca_id)); + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, slave_index); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); @@ -2507,49 +2552,43 @@ static int __esw_set_master_egress_rule(struct mlx5_core_dev *master, flow_rule = mlx5_add_flow_rules(acl, spec, &flow_act, &dest, 1); - if (IS_ERR(flow_rule)) + if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); - else - vport->egress.offloads.bounce_rule = flow_rule; + } else { + err = xa_insert(&vport->egress.offloads.bounce_rules, + slave_index, flow_rule, GFP_KERNEL); + if (err) + mlx5_del_flow_rules(flow_rule); + } kvfree(spec); return err; } -static int esw_set_master_egress_rule(struct mlx5_core_dev *master, - struct mlx5_core_dev *slave) +static int esw_master_egress_create_resources(struct mlx5_eswitch *esw, + struct mlx5_flow_namespace *egress_ns, + struct mlx5_vport *vport, size_t count) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5_eswitch *esw = master->priv.eswitch; struct mlx5_flow_table_attr ft_attr = { - .max_fte = 1, .prio = 0, .level = 0, - .flags = MLX5_FLOW_TABLE_OTHER_VPORT, + .max_fte = count, .prio = 0, .level = 0, }; - struct mlx5_flow_namespace *egress_ns; struct mlx5_flow_table *acl; struct mlx5_flow_group *g; - struct mlx5_vport *vport; void *match_criteria; u32 *flow_group_in; int err; - vport = mlx5_eswitch_get_vport(esw, esw->manager_vport); - if (IS_ERR(vport)) - return PTR_ERR(vport); - - egress_ns = mlx5_get_flow_vport_acl_namespace(master, - MLX5_FLOW_NAMESPACE_ESW_EGRESS, - vport->index); - if (!egress_ns) - return -EINVAL; - if (vport->egress.acl) - return -EINVAL; + return 0; flow_group_in = kvzalloc(inlen, GFP_KERNEL); if (!flow_group_in) return -ENOMEM; + if (vport->vport || mlx5_core_is_ecpf(esw->dev)) + ft_attr.flags = MLX5_FLOW_TABLE_OTHER_VPORT; + acl = mlx5_create_vport_flow_table(egress_ns, &ft_attr, vport->vport); if (IS_ERR(acl)) { err = PTR_ERR(acl); @@ -2568,7 +2607,7 @@ static int esw_set_master_egress_rule(struct mlx5_core_dev *master, MLX5_SET(create_flow_group_in, flow_group_in, source_eswitch_owner_vhca_id_valid, 1); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, count); g = mlx5_create_flow_group(acl, flow_group_in); if (IS_ERR(g)) { @@ -2576,19 +2615,15 @@ static int esw_set_master_egress_rule(struct mlx5_core_dev *master, goto err_group; } - err = __esw_set_master_egress_rule(master, slave, vport, acl); - if (err) - goto err_rule; - vport->egress.acl = acl; vport->egress.offloads.bounce_grp = g; + vport->egress.type = VPORT_EGRESS_ACL_TYPE_SHARED_FDB; + xa_init_flags(&vport->egress.offloads.bounce_rules, XA_FLAGS_ALLOC); kvfree(flow_group_in); return 0; -err_rule: - mlx5_destroy_flow_group(g); err_group: mlx5_destroy_flow_table(acl); out: @@ -2596,18 +2631,74 @@ out: return err; } -static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev) +static void esw_master_egress_destroy_resources(struct mlx5_vport *vport) +{ + if (!xa_empty(&vport->egress.offloads.bounce_rules)) + return; + mlx5_destroy_flow_group(vport->egress.offloads.bounce_grp); + vport->egress.offloads.bounce_grp = NULL; + mlx5_destroy_flow_table(vport->egress.acl); + vport->egress.acl = NULL; +} + +static int esw_set_master_egress_rule(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave, size_t count) +{ + struct mlx5_eswitch *esw = master->priv.eswitch; + u16 slave_index = MLX5_CAP_GEN(slave, vhca_id); + struct mlx5_flow_namespace *egress_ns; + struct mlx5_vport *vport; + int err; + + vport = mlx5_eswitch_get_vport(esw, esw->manager_vport); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + egress_ns = mlx5_get_flow_vport_acl_namespace(master, + MLX5_FLOW_NAMESPACE_ESW_EGRESS, + vport->index); + if (!egress_ns) + return -EINVAL; + + if (vport->egress.acl && vport->egress.type != VPORT_EGRESS_ACL_TYPE_SHARED_FDB) + return 0; + + err = esw_master_egress_create_resources(esw, egress_ns, vport, count); + if (err) + return err; + + if (xa_load(&vport->egress.offloads.bounce_rules, slave_index)) + return -EINVAL; + + err = __esw_set_master_egress_rule(master, slave, vport, vport->egress.acl); + if (err) + goto err_rule; + + return 0; + +err_rule: + esw_master_egress_destroy_resources(vport); + return err; +} + +static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev, + struct mlx5_core_dev *slave_dev) { struct mlx5_vport *vport; vport = mlx5_eswitch_get_vport(dev->priv.eswitch, dev->priv.eswitch->manager_vport); - esw_acl_egress_ofld_cleanup(vport); + esw_acl_egress_ofld_bounce_rule_destroy(vport, MLX5_CAP_GEN(slave_dev, vhca_id)); + + if (xa_empty(&vport->egress.offloads.bounce_rules)) { + esw_acl_egress_ofld_cleanup(vport); + xa_destroy(&vport->egress.offloads.bounce_rules); + } } -int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, - struct mlx5_eswitch *slave_esw) +int mlx5_eswitch_offloads_single_fdb_add_one(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw, int max_slaves) { int err; @@ -2617,7 +2708,7 @@ int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, return err; err = esw_set_master_egress_rule(master_esw->dev, - slave_esw->dev); + slave_esw->dev, max_slaves); if (err) goto err_acl; @@ -2625,21 +2716,21 @@ int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, err_acl: esw_set_slave_root_fdb(NULL, slave_esw->dev); - return err; } -void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, +void mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw, struct mlx5_eswitch *slave_esw) { - esw_unset_master_egress_rule(master_esw->dev); esw_set_slave_root_fdb(NULL, slave_esw->dev); + esw_unset_master_egress_rule(master_esw->dev, slave_esw->dev); } #define ESW_OFFLOADS_DEVCOM_PAIR (0) #define ESW_OFFLOADS_DEVCOM_UNPAIR (1) -static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw) +static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw) { const struct mlx5_eswitch_rep_ops *ops; struct mlx5_eswitch_rep *rep; @@ -2652,18 +2743,19 @@ static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw) ops = esw->offloads.rep_ops[rep_type]; if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED && ops->event) - ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_UNPAIR, NULL); + ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_UNPAIR, peer_esw); } } } -static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw) +static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw) { #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) mlx5e_tc_clean_fdb_peer_flows(esw); #endif - mlx5_esw_offloads_rep_event_unpair(esw); - esw_del_fdb_peer_miss_rules(esw); + mlx5_esw_offloads_rep_event_unpair(esw, peer_esw); + esw_del_fdb_peer_miss_rules(esw, peer_esw->dev); } static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw, @@ -2694,7 +2786,7 @@ static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw, return 0; err_out: - mlx5_esw_offloads_unpair(esw); + mlx5_esw_offloads_unpair(esw, peer_esw); return err; } @@ -2702,6 +2794,8 @@ static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw, struct mlx5_eswitch *peer_esw, bool pair) { + u16 peer_vhca_id = MLX5_CAP_GEN(peer_esw->dev, vhca_id); + u16 vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); struct mlx5_flow_root_namespace *peer_ns; struct mlx5_flow_root_namespace *ns; int err; @@ -2710,18 +2804,18 @@ static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw, ns = esw->dev->priv.steering->fdb_root_ns; if (pair) { - err = mlx5_flow_namespace_set_peer(ns, peer_ns); + err = mlx5_flow_namespace_set_peer(ns, peer_ns, peer_vhca_id); if (err) return err; - err = mlx5_flow_namespace_set_peer(peer_ns, ns); + err = mlx5_flow_namespace_set_peer(peer_ns, ns, vhca_id); if (err) { - mlx5_flow_namespace_set_peer(ns, NULL); + mlx5_flow_namespace_set_peer(ns, NULL, peer_vhca_id); return err; } } else { - mlx5_flow_namespace_set_peer(ns, NULL); - mlx5_flow_namespace_set_peer(peer_ns, NULL); + mlx5_flow_namespace_set_peer(ns, NULL, peer_vhca_id); + mlx5_flow_namespace_set_peer(peer_ns, NULL, vhca_id); } return 0; @@ -2734,15 +2828,21 @@ static int mlx5_esw_offloads_devcom_event(int event, struct mlx5_eswitch *esw = my_data; struct mlx5_devcom *devcom = esw->dev->priv.devcom; struct mlx5_eswitch *peer_esw = event_data; + u16 esw_i, peer_esw_i; + bool esw_paired; int err; + peer_esw_i = MLX5_CAP_GEN(peer_esw->dev, vhca_id); + esw_i = MLX5_CAP_GEN(esw->dev, vhca_id); + esw_paired = !!xa_load(&esw->paired, peer_esw_i); + switch (event) { case ESW_OFFLOADS_DEVCOM_PAIR: if (mlx5_eswitch_vport_match_metadata_enabled(esw) != mlx5_eswitch_vport_match_metadata_enabled(peer_esw)) break; - if (esw->paired[mlx5_get_dev_index(peer_esw->dev)]) + if (esw_paired) break; err = mlx5_esw_offloads_set_ns_peer(esw, peer_esw, true); @@ -2756,28 +2856,43 @@ static int mlx5_esw_offloads_devcom_event(int event, if (err) goto err_pair; - esw->paired[mlx5_get_dev_index(peer_esw->dev)] = true; - peer_esw->paired[mlx5_get_dev_index(esw->dev)] = true; - mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true); + err = xa_insert(&esw->paired, peer_esw_i, peer_esw, GFP_KERNEL); + if (err) + goto err_xa; + + err = xa_insert(&peer_esw->paired, esw_i, esw, GFP_KERNEL); + if (err) + goto err_peer_xa; + + esw->num_peers++; + peer_esw->num_peers++; + mlx5_devcom_comp_set_ready(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true); break; case ESW_OFFLOADS_DEVCOM_UNPAIR: - if (!esw->paired[mlx5_get_dev_index(peer_esw->dev)]) + if (!esw_paired) break; - mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false); - esw->paired[mlx5_get_dev_index(peer_esw->dev)] = false; - peer_esw->paired[mlx5_get_dev_index(esw->dev)] = false; - mlx5_esw_offloads_unpair(peer_esw); - mlx5_esw_offloads_unpair(esw); + peer_esw->num_peers--; + esw->num_peers--; + if (!esw->num_peers && !peer_esw->num_peers) + mlx5_devcom_comp_set_ready(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false); + xa_erase(&peer_esw->paired, esw_i); + xa_erase(&esw->paired, peer_esw_i); + mlx5_esw_offloads_unpair(peer_esw, esw); + mlx5_esw_offloads_unpair(esw, peer_esw); mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false); break; } return 0; +err_peer_xa: + xa_erase(&esw->paired, peer_esw_i); +err_xa: + mlx5_esw_offloads_unpair(peer_esw, esw); err_pair: - mlx5_esw_offloads_unpair(esw); + mlx5_esw_offloads_unpair(esw, peer_esw); err_peer: mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false); err_out: @@ -2789,24 +2904,29 @@ err_out: void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw) { struct mlx5_devcom *devcom = esw->dev->priv.devcom; + int i; - INIT_LIST_HEAD(&esw->offloads.peer_flows); + for (i = 0; i < MLX5_MAX_PORTS; i++) + INIT_LIST_HEAD(&esw->offloads.peer_flows[i]); mutex_init(&esw->offloads.peer_mutex); if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) return; - if (!mlx5_is_lag_supported(esw->dev)) + if (!mlx5_lag_is_supported(esw->dev)) return; + xa_init(&esw->paired); mlx5_devcom_register_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS, mlx5_esw_offloads_devcom_event, esw); + esw->num_peers = 0; mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS, - ESW_OFFLOADS_DEVCOM_PAIR, esw); + ESW_OFFLOADS_DEVCOM_PAIR, + ESW_OFFLOADS_DEVCOM_UNPAIR, esw); } void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) @@ -2816,13 +2936,15 @@ void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) return; - if (!mlx5_is_lag_supported(esw->dev)) + if (!mlx5_lag_is_supported(esw->dev)) return; mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS, + ESW_OFFLOADS_DEVCOM_UNPAIR, ESW_OFFLOADS_DEVCOM_UNPAIR, esw); mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + xa_destroy(&esw->paired); } bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw) @@ -2834,9 +2956,6 @@ bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw) MLX5_FDB_TO_VPORT_REG_C_0)) return false; - if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source)) - return false; - return true; } @@ -2973,26 +3092,47 @@ esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, esw_acl_ingress_ofld_cleanup(esw, vport); } -static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) +static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw) { - struct mlx5_vport *vport; + struct mlx5_vport *uplink, *manager; + int ret; - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); - if (IS_ERR(vport)) - return PTR_ERR(vport); + uplink = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + if (IS_ERR(uplink)) + return PTR_ERR(uplink); + + ret = esw_vport_create_offloads_acl_tables(esw, uplink); + if (ret) + return ret; - return esw_vport_create_offloads_acl_tables(esw, vport); + manager = mlx5_eswitch_get_vport(esw, esw->manager_vport); + if (IS_ERR(manager)) { + ret = PTR_ERR(manager); + goto err_manager; + } + + ret = esw_vport_create_offloads_acl_tables(esw, manager); + if (ret) + goto err_manager; + + return 0; + +err_manager: + esw_vport_destroy_offloads_acl_tables(esw, uplink); + return ret; } -static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) +static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw) { struct mlx5_vport *vport; - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); - if (IS_ERR(vport)) - return; + vport = mlx5_eswitch_get_vport(esw, esw->manager_vport); + if (!IS_ERR(vport)) + esw_vport_destroy_offloads_acl_tables(esw, vport); - esw_vport_destroy_offloads_acl_tables(esw, vport); + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + if (!IS_ERR(vport)) + esw_vport_destroy_offloads_acl_tables(esw, vport); } int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw) @@ -3037,7 +3177,7 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw) } esw->fdb_table.offloads.indir = indir; - err = esw_create_uplink_offloads_acl_tables(esw); + err = esw_create_offloads_acl_tables(esw); if (err) goto create_acl_err; @@ -3078,7 +3218,7 @@ create_fdb_err: create_restore_err: esw_destroy_offloads_table(esw); create_offloads_err: - esw_destroy_uplink_offloads_acl_tables(esw); + esw_destroy_offloads_acl_tables(esw); create_acl_err: mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir); create_indir_err: @@ -3094,7 +3234,7 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) esw_destroy_offloads_fdb_tables(esw); esw_destroy_restore_table(esw); esw_destroy_offloads_table(esw); - esw_destroy_uplink_offloads_acl_tables(esw); + esw_destroy_offloads_acl_tables(esw); mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir); mutex_destroy(&esw->fdb_table.offloads.vports.lock); } @@ -3247,9 +3387,12 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) /* Representor will control the vport link state */ mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN; + if (mlx5_core_ec_sriov_enabled(esw->dev)) + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs) + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN; /* Uplink vport rep must load first. */ - err = esw_offloads_load_rep(esw, MLX5_VPORT_UPLINK); + err = mlx5_esw_offloads_load_rep(esw, MLX5_VPORT_UPLINK); if (err) goto err_uplink; @@ -3260,7 +3403,7 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) return 0; err_vports: - esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); + mlx5_esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); err_uplink: esw_offloads_steering_cleanup(esw); err_steering_init: @@ -3285,7 +3428,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, /* If changing from switchdev to legacy mode without sriov enabled, * no need to create legacy fdb. */ - if (!mlx5_sriov_is_enabled(esw->dev)) + if (!mlx5_core_is_pf(esw->dev) || !mlx5_sriov_is_enabled(esw->dev)) return 0; err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_IGNORE_NUM_VFS); @@ -3298,7 +3441,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, void esw_offloads_disable(struct mlx5_eswitch *esw) { mlx5_eswitch_disable_pf_vf_vports(esw); - esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); + mlx5_esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); esw_set_passing_vport_metadata(esw, false); esw_offloads_steering_cleanup(esw); mapping_destroy(esw->offloads.reg_c0_obj_pool); @@ -3484,8 +3627,27 @@ static int mlx5_esw_vports_inline_set(struct mlx5_eswitch *esw, u8 mlx5_mode, goto revert_inline_mode; } } + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs) { + err = mlx5_modify_nic_vport_min_inline(dev, vport->vport, mlx5_mode); + if (err) { + err_vport_num = vport->vport; + NL_SET_ERR_MSG_MOD(extack, + "Failed to set min inline on vport"); + goto revert_ec_vf_inline_mode; + } + } + } return 0; +revert_ec_vf_inline_mode: + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs) { + if (vport->vport == err_vport_num) + break; + mlx5_modify_nic_vport_min_inline(dev, + vport->vport, + esw->offloads.inline_mode); + } revert_inline_mode: mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) { if (vport->vport == err_vport_num) @@ -3835,9 +3997,6 @@ static int mlx5_esw_query_vport_vhca_id(struct mlx5_eswitch *esw, u16 vport_num, int err; *vhca_id = 0; - if (mlx5_esw_is_manager_vport(esw, vport_num) || - !MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) - return -EPERM; query_ctx = kzalloc(query_out_sz, GFP_KERNEL); if (!query_ctx) @@ -3926,9 +4085,9 @@ is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num) mlx5_esw_is_sf_vport(esw, vport_num); } -int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port, - u8 *hw_addr, int *hw_addr_len, - struct netlink_ext_ack *extack) +int mlx5_devlink_port_fn_hw_addr_get(struct devlink_port *port, + u8 *hw_addr, int *hw_addr_len, + struct netlink_ext_ack *extack) { struct mlx5_eswitch *esw; struct mlx5_vport *vport; @@ -3955,9 +4114,9 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port, return 0; } -int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port, - const u8 *hw_addr, int hw_addr_len, - struct netlink_ext_ack *extack) +int mlx5_devlink_port_fn_hw_addr_set(struct devlink_port *port, + const u8 *hw_addr, int hw_addr_len, + struct netlink_ext_ack *extack) { struct mlx5_eswitch *esw; u16 vport_num; @@ -4073,7 +4232,7 @@ int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable, } hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); - MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, 1); + MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, enable); err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport->vport, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index 718cf09c28ce..3ec892d51f57 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -5,7 +5,7 @@ #include "mlx5_core.h" #include "lib/eq.h" -#include "lib/mlx5.h" +#include "lib/events.h" struct mlx5_event_nb { struct mlx5_nb nb; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index ec83e6483d1a..244cfd470903 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -139,7 +139,8 @@ static void mlx5_cmd_stub_modify_header_dealloc(struct mlx5_flow_root_namespace } static int mlx5_cmd_stub_set_peer(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_root_namespace *peer_ns) + struct mlx5_flow_root_namespace *peer_ns, + u16 peer_vhca_id) { return 0; } @@ -243,16 +244,30 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns, ft->type == FS_FT_FDB && mlx5_lag_is_shared_fdb(dev) && mlx5_lag_is_master(dev)) { - err = mlx5_cmd_set_slave_root_fdb(dev, - mlx5_lag_get_peer_mdev(dev), - !disconnect, (!disconnect) ? - ft->id : 0); - if (err && !disconnect) { - MLX5_SET(set_flow_table_root_in, in, op_mod, 0); - MLX5_SET(set_flow_table_root_in, in, table_id, - ns->root_ft->id); - mlx5_cmd_exec_in(dev, set_flow_table_root, in); + struct mlx5_core_dev *peer_dev; + int i, j; + + mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) { + err = mlx5_cmd_set_slave_root_fdb(dev, peer_dev, !disconnect, + (!disconnect) ? ft->id : 0); + if (err && !disconnect) { + mlx5_lag_for_each_peer_mdev(dev, peer_dev, j) { + if (j < i) + mlx5_cmd_set_slave_root_fdb(dev, peer_dev, 1, + ns->root_ft->id); + else + break; + } + + MLX5_SET(set_flow_table_root_in, in, op_mod, 0); + MLX5_SET(set_flow_table_root_in, in, table_id, + ns->root_ft->id); + mlx5_cmd_exec_in(dev, set_flow_table_root, in); + } + if (err) + break; } + } return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index 8ef4254b9ea1..7790ae5531e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -93,7 +93,8 @@ struct mlx5_flow_cmds { struct mlx5_modify_hdr *modify_hdr); int (*set_peer)(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_root_namespace *peer_ns); + struct mlx5_flow_root_namespace *peer_ns, + u16 peer_vhca_id); int (*create_ns)(struct mlx5_flow_root_namespace *ns); int (*destroy_ns)(struct mlx5_flow_root_namespace *ns); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 19da02c41616..6b069fa411c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -889,7 +889,7 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root, struct fs_node *iter = list_entry(start, struct fs_node, list); struct mlx5_flow_table *ft = NULL; - if (!root || root->type == FS_TYPE_PRIO_CHAINS) + if (!root) return NULL; list_for_each_advance_continue(iter, &root->children, reverse) { @@ -905,20 +905,42 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root, return ft; } -/* If reverse is false then return the first flow table in next priority of - * prio in the tree, else return the last flow table in the previous priority - * of prio in the tree. +static struct fs_node *find_prio_chains_parent(struct fs_node *parent, + struct fs_node **child) +{ + struct fs_node *node = NULL; + + while (parent && parent->type != FS_TYPE_PRIO_CHAINS) { + node = parent; + parent = parent->parent; + } + + if (child) + *child = node; + + return parent; +} + +/* If reverse is false then return the first flow table next to the passed node + * in the tree, else return the last flow table before the node in the tree. + * If skip is true, skip the flow tables in the same prio_chains prio. */ -static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool reverse) +static struct mlx5_flow_table *find_closest_ft(struct fs_node *node, bool reverse, + bool skip) { + struct fs_node *prio_chains_parent = NULL; struct mlx5_flow_table *ft = NULL; struct fs_node *curr_node; struct fs_node *parent; - parent = prio->node.parent; - curr_node = &prio->node; + if (skip) + prio_chains_parent = find_prio_chains_parent(node, NULL); + parent = node->parent; + curr_node = node; while (!ft && parent) { - ft = find_closest_ft_recursive(parent, &curr_node->list, reverse); + if (parent != prio_chains_parent) + ft = find_closest_ft_recursive(parent, &curr_node->list, + reverse); curr_node = parent; parent = curr_node->parent; } @@ -926,15 +948,15 @@ static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool revers } /* Assuming all the tree is locked by mutex chain lock */ -static struct mlx5_flow_table *find_next_chained_ft(struct fs_prio *prio) +static struct mlx5_flow_table *find_next_chained_ft(struct fs_node *node) { - return find_closest_ft(prio, false); + return find_closest_ft(node, false, true); } /* Assuming all the tree is locked by mutex chain lock */ -static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio) +static struct mlx5_flow_table *find_prev_chained_ft(struct fs_node *node) { - return find_closest_ft(prio, true); + return find_closest_ft(node, true, true); } static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft, @@ -946,7 +968,7 @@ static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft, next_ns = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS; fs_get_obj(prio, next_ns ? ft->ns->node.parent : ft->node.parent); - return find_next_chained_ft(prio); + return find_next_chained_ft(&prio->node); } static int connect_fts_in_prio(struct mlx5_core_dev *dev, @@ -970,21 +992,55 @@ static int connect_fts_in_prio(struct mlx5_core_dev *dev, return 0; } +static struct mlx5_flow_table *find_closet_ft_prio_chains(struct fs_node *node, + struct fs_node *parent, + struct fs_node **child, + bool reverse) +{ + struct mlx5_flow_table *ft; + + ft = find_closest_ft(node, reverse, false); + + if (ft && parent == find_prio_chains_parent(&ft->node, child)) + return ft; + + return NULL; +} + /* Connect flow tables from previous priority of prio to ft */ static int connect_prev_fts(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, struct fs_prio *prio) { + struct fs_node *prio_parent, *parent = NULL, *child, *node; struct mlx5_flow_table *prev_ft; + int err = 0; + + prio_parent = find_prio_chains_parent(&prio->node, &child); + + /* return directly if not under the first sub ns of prio_chains prio */ + if (prio_parent && !list_is_first(&child->list, &prio_parent->children)) + return 0; - prev_ft = find_prev_chained_ft(prio); - if (prev_ft) { + prev_ft = find_prev_chained_ft(&prio->node); + while (prev_ft) { struct fs_prio *prev_prio; fs_get_obj(prev_prio, prev_ft->node.parent); - return connect_fts_in_prio(dev, prev_prio, ft); + err = connect_fts_in_prio(dev, prev_prio, ft); + if (err) + break; + + if (!parent) { + parent = find_prio_chains_parent(&prev_prio->node, &child); + if (!parent) + break; + } + + node = child; + prev_ft = find_closet_ft_prio_chains(node, parent, &child, true); } - return 0; + return err; } static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio @@ -1123,7 +1179,7 @@ static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table if (err) return err; - next_ft = first_ft ? first_ft : find_next_chained_ft(prio); + next_ft = first_ft ? first_ft : find_next_chained_ft(&prio->node); err = connect_fwd_rules(dev, ft, next_ft); if (err) return err; @@ -1198,7 +1254,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table); next_ft = unmanaged ? ft_attr->next_ft : - find_next_chained_ft(fs_prio); + find_next_chained_ft(&fs_prio->node); ft->def_miss_action = ns->def_miss_action; ft->ns = ns; err = root->cmds->create_flow_table(root, ft, ft_attr, next_ft); @@ -2195,13 +2251,20 @@ EXPORT_SYMBOL(mlx5_del_flow_rules); /* Assuming prio->node.children(flow tables) is sorted by level */ static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft) { + struct fs_node *prio_parent, *child; struct fs_prio *prio; fs_get_obj(prio, ft->node.parent); if (!list_is_last(&ft->node.list, &prio->node.children)) return list_next_entry(ft, node.list); - return find_next_chained_ft(prio); + + prio_parent = find_prio_chains_parent(&prio->node, &child); + + if (prio_parent && list_is_first(&child->list, &prio_parent->children)) + return find_closest_ft(&prio->node, false, false); + + return find_next_chained_ft(&prio->node); } static int update_root_ft_destroy(struct mlx5_flow_table *ft) @@ -3620,7 +3683,8 @@ void mlx5_destroy_match_definer(struct mlx5_core_dev *dev, } int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_root_namespace *peer_ns) + struct mlx5_flow_root_namespace *peer_ns, + u16 peer_vhca_id) { if (peer_ns && ns->mode != peer_ns->mode) { mlx5_core_err(ns->dev, @@ -3628,7 +3692,7 @@ int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, return -EINVAL; } - return ns->cmds->set_peer(ns, peer_ns); + return ns->cmds->set_peer(ns, peer_ns, peer_vhca_id); } /* This function should be called only at init stage of the namespace. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index b043190e50a8..4aed1768b85f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -302,7 +302,8 @@ void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void); int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_root_namespace *peer_ns); + struct mlx5_flow_root_namespace *peer_ns, + u16 peer_vhca_id); int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns, enum mlx5_flow_steering_mode mode); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 7bb7be01225a..fb2035a5ec99 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -196,14 +196,11 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } - if (MLX5_CAP_GEN(dev, vport_group_manager) && - MLX5_ESWITCH_MANAGER(dev)) { + if (MLX5_ESWITCH_MANAGER(dev)) { err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE); if (err) return err; - } - if (MLX5_ESWITCH_MANAGER(dev)) { err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 50022e7565f1..99dcbd006357 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -21,6 +21,7 @@ struct mlx5_fw_reset { struct workqueue_struct *wq; struct work_struct fw_live_patch_work; struct work_struct reset_request_work; + struct work_struct reset_unload_work; struct work_struct reset_reload_work; struct work_struct reset_now_work; struct work_struct reset_abort_work; @@ -30,6 +31,26 @@ struct mlx5_fw_reset { int ret; }; +enum { + MLX5_FW_RST_STATE_IDLE = 0, + MLX5_FW_RST_STATE_TOGGLE_REQ = 4, +}; + +enum { + MLX5_RST_STATE_BIT_NUM = 12, + MLX5_RST_ACK_BIT_NUM = 22, +}; + +static u8 mlx5_get_fw_rst_state(struct mlx5_core_dev *dev) +{ + return (ioread32be(&dev->iseg->initializing) >> MLX5_RST_STATE_BIT_NUM) & 0xF; +} + +static void mlx5_set_fw_rst_ack(struct mlx5_core_dev *dev) +{ + iowrite32be(BIT(MLX5_RST_ACK_BIT_NUM), &dev->iseg->initializing); +} + static int mlx5_fw_reset_enable_remote_dev_reset_set(struct devlink *devlink, u32 id, struct devlink_param_gset_ctx *ctx) { @@ -155,7 +176,7 @@ int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev) return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL0, 0, 0, false); } -static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) +static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unloaded) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; @@ -163,7 +184,8 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { complete(&fw_reset->done); } else { - mlx5_unload_one(dev, false); + if (!unloaded) + mlx5_unload_one(dev, false); if (mlx5_health_wait_pci_up(dev)) mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); else @@ -204,7 +226,7 @@ static void mlx5_sync_reset_reload_work(struct work_struct *work) mlx5_sync_reset_clear_reset_requested(dev, false); mlx5_enter_error_state(dev, true); - mlx5_fw_reset_complete_reload(dev); + mlx5_fw_reset_complete_reload(dev, false); } #define MLX5_RESET_POLL_INTERVAL (HZ / 10) @@ -276,6 +298,44 @@ static void mlx5_fw_live_patch_event(struct work_struct *work) mlx5_core_err(dev, "Failed to reload FW tracer\n"); } +static int mlx5_check_dev_ids(struct mlx5_core_dev *dev, u16 dev_id) +{ + struct pci_bus *bridge_bus = dev->pdev->bus; + struct pci_dev *sdev; + u16 sdev_id; + int err; + + /* Check that all functions under the pci bridge are PFs of + * this device otherwise fail this function. + */ + list_for_each_entry(sdev, &bridge_bus->devices, bus_list) { + err = pci_read_config_word(sdev, PCI_DEVICE_ID, &sdev_id); + if (err) + return err; + if (sdev_id != dev_id) { + mlx5_core_warn(dev, "unrecognized dev_id (0x%x)\n", sdev_id); + return -EPERM; + } + } + return 0; +} + +static bool mlx5_is_reset_now_capable(struct mlx5_core_dev *dev) +{ + u16 dev_id; + int err; + + if (!MLX5_CAP_GEN(dev, fast_teardown)) { + mlx5_core_warn(dev, "fast teardown is not supported by firmware\n"); + return false; + } + + err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id); + if (err) + return false; + return (!mlx5_check_dev_ids(dev, dev_id)); +} + static void mlx5_sync_reset_request_event(struct work_struct *work) { struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, @@ -283,7 +343,8 @@ static void mlx5_sync_reset_request_event(struct work_struct *work) struct mlx5_core_dev *dev = fw_reset->dev; int err; - if (test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags)) { + if (test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags) || + !mlx5_is_reset_now_capable(dev)) { err = mlx5_fw_reset_set_reset_sync_nack(dev); mlx5_core_warn(dev, "PCI Sync FW Update Reset Nack %s", err ? "Failed" : "Sent"); @@ -303,26 +364,17 @@ static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev) { struct pci_bus *bridge_bus = dev->pdev->bus; struct pci_dev *bridge = bridge_bus->self; - u16 reg16, dev_id, sdev_id; unsigned long timeout; struct pci_dev *sdev; + u16 reg16, dev_id; int cap, err; - u32 reg32; - /* Check that all functions under the pci bridge are PFs of - * this device otherwise fail this function. - */ err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id); if (err) return err; - list_for_each_entry(sdev, &bridge_bus->devices, bus_list) { - err = pci_read_config_word(sdev, PCI_DEVICE_ID, &sdev_id); - if (err) - return err; - if (sdev_id != dev_id) - return -EPERM; - } - + err = mlx5_check_dev_ids(dev, dev_id); + if (err) + return err; cap = pci_find_capability(bridge, PCI_CAP_ID_EXP); if (!cap) return -EOPNOTSUPP; @@ -332,25 +384,17 @@ static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev) pci_cfg_access_lock(sdev); } /* PCI link toggle */ - err = pci_read_config_word(bridge, cap + PCI_EXP_LNKCTL, ®16); - if (err) - return err; - reg16 |= PCI_EXP_LNKCTL_LD; - err = pci_write_config_word(bridge, cap + PCI_EXP_LNKCTL, reg16); + err = pcie_capability_set_word(bridge, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_LD); if (err) return err; msleep(500); - reg16 &= ~PCI_EXP_LNKCTL_LD; - err = pci_write_config_word(bridge, cap + PCI_EXP_LNKCTL, reg16); + err = pcie_capability_clear_word(bridge, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_LD); if (err) return err; /* Check link */ - err = pci_read_config_dword(bridge, cap + PCI_EXP_LNKCAP, ®32); - if (err) - return err; - if (!(reg32 & PCI_EXP_LNKCAP_DLLLARC)) { - mlx5_core_warn(dev, "No PCI link reporting capability (0x%08x)\n", reg32); + if (!bridge->link_active_reporting) { + mlx5_core_warn(dev, "No PCI link reporting capability\n"); msleep(1000); goto restore; } @@ -427,7 +471,70 @@ static void mlx5_sync_reset_now_event(struct work_struct *work) mlx5_enter_error_state(dev, true); done: fw_reset->ret = err; - mlx5_fw_reset_complete_reload(dev); + mlx5_fw_reset_complete_reload(dev, false); +} + +static void mlx5_sync_reset_unload_event(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset; + struct mlx5_core_dev *dev; + unsigned long timeout; + bool reset_action; + u8 rst_state; + int err; + + fw_reset = container_of(work, struct mlx5_fw_reset, reset_unload_work); + dev = fw_reset->dev; + + if (mlx5_sync_reset_clear_reset_requested(dev, false)) + return; + + mlx5_core_warn(dev, "Sync Reset Unload. Function is forced down.\n"); + + err = mlx5_cmd_fast_teardown_hca(dev); + if (err) + mlx5_core_warn(dev, "Fast teardown failed, unloading, err %d\n", err); + else + mlx5_enter_error_state(dev, true); + + if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) + mlx5_unload_one_devl_locked(dev, false); + else + mlx5_unload_one(dev, false); + + mlx5_set_fw_rst_ack(dev); + mlx5_core_warn(dev, "Sync Reset Unload done, device reset expected\n"); + + reset_action = false; + timeout = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, RESET_UNLOAD)); + do { + rst_state = mlx5_get_fw_rst_state(dev); + if (rst_state == MLX5_FW_RST_STATE_TOGGLE_REQ || + rst_state == MLX5_FW_RST_STATE_IDLE) { + reset_action = true; + break; + } + msleep(20); + } while (!time_after(jiffies, timeout)); + + if (!reset_action) { + mlx5_core_err(dev, "Got timeout waiting for sync reset action, state = %u\n", + rst_state); + fw_reset->ret = -ETIMEDOUT; + goto done; + } + + mlx5_core_warn(dev, "Sync Reset, got reset action. rst_state = %u\n", rst_state); + if (rst_state == MLX5_FW_RST_STATE_TOGGLE_REQ) { + err = mlx5_pci_link_toggle(dev); + if (err) { + mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, err %d\n", err); + fw_reset->ret = err; + } + } + +done: + mlx5_fw_reset_complete_reload(dev, true); } static void mlx5_sync_reset_abort_event(struct work_struct *work) @@ -452,6 +559,9 @@ static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct case MLX5_SYNC_RST_STATE_RESET_REQUEST: queue_work(fw_reset->wq, &fw_reset->reset_request_work); break; + case MLX5_SYNC_RST_STATE_RESET_UNLOAD: + queue_work(fw_reset->wq, &fw_reset->reset_unload_work); + break; case MLX5_SYNC_RST_STATE_RESET_NOW: queue_work(fw_reset->wq, &fw_reset->reset_now_work); break; @@ -486,10 +596,13 @@ static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long acti int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev) { unsigned long pci_sync_update_timeout = mlx5_tout_ms(dev, PCI_SYNC_UPDATE); - unsigned long timeout = msecs_to_jiffies(pci_sync_update_timeout); struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + unsigned long timeout; int err; + if (MLX5_CAP_GEN(dev, pci_sync_for_fw_update_with_driver_unload)) + pci_sync_update_timeout += mlx5_tout_ms(dev, RESET_UNLOAD); + timeout = msecs_to_jiffies(pci_sync_update_timeout); if (!wait_for_completion_timeout(&fw_reset->done, timeout)) { mlx5_core_warn(dev, "FW sync reset timeout after %lu seconds\n", pci_sync_update_timeout / 1000); @@ -526,6 +639,7 @@ void mlx5_drain_fw_reset(struct mlx5_core_dev *dev) set_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags); cancel_work_sync(&fw_reset->fw_live_patch_work); cancel_work_sync(&fw_reset->reset_request_work); + cancel_work_sync(&fw_reset->reset_unload_work); cancel_work_sync(&fw_reset->reset_reload_work); cancel_work_sync(&fw_reset->reset_now_work); cancel_work_sync(&fw_reset->reset_abort_work); @@ -564,6 +678,7 @@ int mlx5_fw_reset_init(struct mlx5_core_dev *dev) INIT_WORK(&fw_reset->fw_live_patch_work, mlx5_fw_live_patch_event); INIT_WORK(&fw_reset->reset_request_work, mlx5_sync_reset_request_event); + INIT_WORK(&fw_reset->reset_unload_work, mlx5_sync_reset_unload_event); INIT_WORK(&fw_reset->reset_reload_work, mlx5_sync_reset_reload_work); INIT_WORK(&fw_reset->reset_now_work, mlx5_sync_reset_now_event); INIT_WORK(&fw_reset->reset_abort_work, mlx5_sync_reset_abort_event); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 871c32dda66e..187cb2c464f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -39,6 +39,7 @@ #include "mlx5_core.h" #include "lib/eq.h" #include "lib/mlx5.h" +#include "lib/events.h" #include "lib/pci_vsc.h" #include "lib/tout.h" #include "diag/fw_tracer.h" @@ -719,7 +720,7 @@ static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = { #define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000 #define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD -static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) +void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; struct devlink *devlink = priv_to_devlink(dev); @@ -735,17 +736,17 @@ static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) } health->fw_reporter = - devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops, - 0, dev); + devl_health_reporter_create(devlink, &mlx5_fw_reporter_ops, + 0, dev); if (IS_ERR(health->fw_reporter)) mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n", PTR_ERR(health->fw_reporter)); health->fw_fatal_reporter = - devlink_health_reporter_create(devlink, - &mlx5_fw_fatal_reporter_ops, - grace_period, - dev); + devl_health_reporter_create(devlink, + &mlx5_fw_fatal_reporter_ops, + grace_period, + dev); if (IS_ERR(health->fw_fatal_reporter)) mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n", PTR_ERR(health->fw_fatal_reporter)); @@ -777,7 +778,8 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; - queue_work(health->wq, &health->fatal_report_work); + if (!mlx5_dev_is_lightweight(dev)) + queue_work(health->wq, &health->fatal_report_work); } #define MLX5_MSEC_PER_HOUR (MSEC_PER_SEC * 60 * 60) @@ -905,10 +907,15 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev) int mlx5_health_init(struct mlx5_core_dev *dev) { + struct devlink *devlink = priv_to_devlink(dev); struct mlx5_core_health *health; char *name; - mlx5_fw_reporters_create(dev); + if (!mlx5_dev_is_lightweight(dev)) { + devl_lock(devlink); + mlx5_fw_reporters_create(dev); + devl_unlock(devlink); + } mlx5_reporter_vnic_create(dev); health = &dev->priv.health; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 5d331b940f4d..f0a074b2fcdf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -512,8 +512,11 @@ static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev, return; if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) && - tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH) + tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH) { + if (ldev->ports > 2) + ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS; set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); + } } static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, @@ -550,6 +553,29 @@ char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) } } +static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_eswitch *master_esw = dev0->priv.eswitch; + int err; + int i; + + for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) { + struct mlx5_eswitch *slave_esw = ldev->pf[i].dev->priv.eswitch; + + err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw, + slave_esw, ldev->ports); + if (err) + goto err; + } + return 0; +err: + for (; i > MLX5_LAG_P1; i--) + mlx5_eswitch_offloads_single_fdb_del_one(master_esw, + ldev->pf[i].dev->priv.eswitch); + return err; +} + static int mlx5_create_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker, enum mlx5_lag_mode mode, @@ -557,7 +583,6 @@ static int mlx5_create_lag(struct mlx5_lag *ldev, { bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; int err; @@ -575,8 +600,7 @@ static int mlx5_create_lag(struct mlx5_lag *ldev, } if (shared_fdb) { - err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch, - dev1->priv.eswitch); + err = mlx5_lag_create_single_fdb(ldev); if (err) mlx5_core_err(dev0, "Can't enable single FDB mode\n"); else @@ -647,19 +671,21 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, int mlx5_deactivate_lag(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + struct mlx5_eswitch *master_esw = dev0->priv.eswitch; u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; bool roce_lag = __mlx5_lag_is_roce(ldev); unsigned long flags = ldev->mode_flags; int err; + int i; ldev->mode = MLX5_LAG_MODE_NONE; ldev->mode_flags = 0; mlx5_lag_mp_reset(ldev); if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) { - mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch, - dev1->priv.eswitch); + for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) + mlx5_eswitch_offloads_single_fdb_del_one(master_esw, + ldev->pf[i].dev->priv.eswitch); clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); } @@ -685,7 +711,7 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev) return 0; } -#define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2 +#define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 4 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) { #ifdef CONFIG_MLX5_ESWITCH @@ -711,7 +737,7 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) return false; - if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports != MLX5_LAG_OFFLOADS_SUPPORTED_PORTS) + if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports > MLX5_LAG_OFFLOADS_SUPPORTED_PORTS) return false; #else for (i = 0; i < ldev->ports; i++) @@ -759,7 +785,6 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) { bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; bool roce_lag; int err; int i; @@ -784,28 +809,35 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) if (shared_fdb || roce_lag) mlx5_lag_add_devices(ldev); - if (shared_fdb) { - if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) - mlx5_eswitch_reload_reps(dev0->priv.eswitch); - if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) - mlx5_eswitch_reload_reps(dev1->priv.eswitch); - } + if (shared_fdb) + for (i = 0; i < ldev->ports; i++) + if (!(ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) + mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch); } -bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) +static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) { - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; - - if (is_mdev_switchdev_mode(dev0) && - is_mdev_switchdev_mode(dev1) && - mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) && - mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) && - mlx5_devcom_is_paired(dev0->priv.devcom, - MLX5_DEVCOM_ESW_OFFLOADS) && - MLX5_CAP_GEN(dev1, lag_native_fdb_selection) && - MLX5_CAP_ESW(dev1, root_ft_on_other_esw) && - MLX5_CAP_ESW(dev0, esw_shared_ingress_acl)) + struct mlx5_core_dev *dev; + int i; + + for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) { + dev = ldev->pf[i].dev; + if (is_mdev_switchdev_mode(dev) && + mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && + MLX5_CAP_GEN(dev, lag_native_fdb_selection) && + MLX5_CAP_ESW(dev, root_ft_on_other_esw) && + mlx5_eswitch_get_npeers(dev->priv.eswitch) == + MLX5_CAP_GEN(dev, num_lag_ports) - 1) + continue; + return false; + } + + dev = ldev->pf[MLX5_LAG_P1].dev; + if (is_mdev_switchdev_mode(dev) && + mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && + mlx5_devcom_comp_is_ready(dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS) && + MLX5_CAP_ESW(dev, esw_shared_ingress_acl) && + mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1) return true; return false; @@ -842,7 +874,6 @@ static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond) static void mlx5_do_bond(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; struct lag_tracker tracker = { }; bool do_bond, roce_lag; int err; @@ -883,20 +914,24 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) for (i = 1; i < ldev->ports; i++) mlx5_nic_vport_enable_roce(ldev->pf[i].dev); } else if (shared_fdb) { + int i; + dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); - err = mlx5_eswitch_reload_reps(dev0->priv.eswitch); - if (!err) - err = mlx5_eswitch_reload_reps(dev1->priv.eswitch); + for (i = 0; i < ldev->ports; i++) { + err = mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch); + if (err) + break; + } if (err) { dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); mlx5_deactivate_lag(ldev); mlx5_lag_add_devices(ldev); - mlx5_eswitch_reload_reps(dev0->priv.eswitch); - mlx5_eswitch_reload_reps(dev1->priv.eswitch); + for (i = 0; i < ldev->ports; i++) + mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch); mlx5_core_err(dev0, "Failed to enable lag\n"); return; } @@ -1233,14 +1268,21 @@ recheck: mlx5_ldev_put(ldev); } +bool mlx5_lag_is_supported(struct mlx5_core_dev *dev) +{ + if (!MLX5_CAP_GEN(dev, vport_group_manager) || + !MLX5_CAP_GEN(dev, lag_master) || + MLX5_CAP_GEN(dev, num_lag_ports) < 2 || + MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS) + return false; + return true; +} + void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) { int err; - if (!MLX5_CAP_GEN(dev, vport_group_manager) || - !MLX5_CAP_GEN(dev, lag_master) || - (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS || - MLX5_CAP_GEN(dev, num_lag_ports) <= 1)) + if (!mlx5_lag_is_supported(dev)) return; recheck: @@ -1496,26 +1538,37 @@ u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_lag_get_num_ports); -struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev) +struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i) { struct mlx5_core_dev *peer_dev = NULL; struct mlx5_lag *ldev; unsigned long flags; + int idx; spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); if (!ldev) goto unlock; - peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ? - ldev->pf[MLX5_LAG_P2].dev : - ldev->pf[MLX5_LAG_P1].dev; + if (*i == ldev->ports) + goto unlock; + for (idx = *i; idx < ldev->ports; idx++) + if (ldev->pf[idx].dev != dev) + break; + + if (idx == ldev->ports) { + *i = idx; + goto unlock; + } + *i = idx + 1; + + peer_dev = ldev->pf[idx].dev; unlock: spin_unlock_irqrestore(&lag_lock, flags); return peer_dev; } -EXPORT_SYMBOL(mlx5_lag_get_peer_mdev); +EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev); int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, u64 *values, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index bc1f1dd3e283..a061b1873e27 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -74,15 +74,7 @@ struct mlx5_lag { struct lag_mpesw lag_mpesw; }; -static inline bool mlx5_is_lag_supported(struct mlx5_core_dev *dev) -{ - if (!MLX5_CAP_GEN(dev, vport_group_manager) || - !MLX5_CAP_GEN(dev, lag_master) || - MLX5_CAP_GEN(dev, num_lag_ports) < 2 || - MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS) - return false; - return true; -} +bool mlx5_lag_is_supported(struct mlx5_core_dev *dev); static inline struct mlx5_lag * mlx5_lag_dev(struct mlx5_core_dev *dev) @@ -111,7 +103,6 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, bool shared_fdb); int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, struct net_device *ndev); -bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev); char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags); void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index d85a8dfc153d..b1aa494c76ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -7,13 +7,14 @@ #include "lag/mp.h" #include "mlx5_core.h" #include "eswitch.h" -#include "lib/mlx5.h" +#include "lib/events.h" static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev) { return ldev->mode == MLX5_LAG_MODE_MULTIPATH; } +#define MLX5_LAG_MULTIPATH_OFFLOADS_SUPPORTED_PORTS 2 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) { if (!mlx5_lag_is_ready(ldev)) @@ -22,6 +23,9 @@ static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev)) return false; + if (ldev->ports > MLX5_LAG_MULTIPATH_OFFLOADS_SUPPORTED_PORTS) + return false; + return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev, ldev->pf[MLX5_LAG_P2].dev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index 0c0ef600f643..4bf15391525c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -6,7 +6,7 @@ #include "lag/lag.h" #include "eswitch.h" #include "esw/acl/ofld.h" -#include "lib/mlx5.h" +#include "lib/events.h" static void mlx5_mpesw_metadata_cleanup(struct mlx5_lag *ldev) { @@ -65,6 +65,7 @@ err_metadata: return err; } +#define MLX5_LAG_MPESW_OFFLOADS_SUPPORTED_PORTS 2 static int enable_mpesw(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; @@ -74,6 +75,9 @@ static int enable_mpesw(struct mlx5_lag *ldev) if (ldev->mode != MLX5_LAG_MODE_NONE) return -EINVAL; + if (ldev->ports > MLX5_LAG_MPESW_OFFLOADS_SUPPORTED_PORTS) + return -EOPNOTSUPP; + if (mlx5_eswitch_mode(dev0) != MLX5_ESWITCH_OFFLOADS || !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table) || !MLX5_CAP_GEN(dev0, create_lag_when_not_master_up) || diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index d3a3fe4ce670..7d9bbb494d95 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -574,7 +574,7 @@ static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, for (i = 0; i < ldev->ports; i++) { for (j = 0; j < ldev->buckets; j++) { idx = i * ldev->buckets + j; - if (ldev->v2p_map[i] == ports[i]) + if (ldev->v2p_map[idx] == ports[idx]) continue; dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[ports[idx] - 1].dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 932fbc843c69..aa29f09e8356 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -32,6 +32,7 @@ #include <linux/clocksource.h> #include <linux/highmem.h> +#include <linux/log2.h> #include <linux/ptp_clock_kernel.h> #include <rdma/mlx5-abi.h> #include "lib/eq.h" @@ -39,10 +40,6 @@ #include "clock.h" enum { - MLX5_CYCLES_SHIFT = 31 -}; - -enum { MLX5_PIN_MODE_IN = 0x0, MLX5_PIN_MODE_OUT = 0x1, }; @@ -93,17 +90,48 @@ static bool mlx5_modify_mtutc_allowed(struct mlx5_core_dev *mdev) return MLX5_CAP_MCAM_FEATURE(mdev, ptpcyc2realtime_modify); } -static bool mlx5_is_mtutc_time_adj_cap(struct mlx5_core_dev *mdev, s64 delta) +static u32 mlx5_ptp_shift_constant(u32 dev_freq_khz) { - s64 min = MLX5_MTUTC_OPERATION_ADJUST_TIME_MIN; - s64 max = MLX5_MTUTC_OPERATION_ADJUST_TIME_MAX; + /* Optimal shift constant leads to corrections above just 1 scaled ppm. + * + * Two sets of equations are needed to derive the optimal shift + * constant for the cyclecounter. + * + * dev_freq_khz * 1000 / 2^shift_constant = 1 scaled_ppm + * ppb = scaled_ppm * 1000 / 2^16 + * + * Using the two equations together + * + * dev_freq_khz * 1000 / 1 scaled_ppm = 2^shift_constant + * dev_freq_khz * 2^16 / 1 ppb = 2^shift_constant + * dev_freq_khz = 2^(shift_constant - 16) + * + * then yields + * + * shift_constant = ilog2(dev_freq_khz) + 16 + */ - if (MLX5_CAP_MCAM_FEATURE(mdev, mtutc_time_adjustment_extended_range)) { - min = MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MIN; - max = MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX; - } + return min(ilog2(dev_freq_khz) + 16, + ilog2((U32_MAX / NSEC_PER_MSEC) * dev_freq_khz)); +} + +static s32 mlx5_ptp_getmaxphase(struct ptp_clock_info *ptp) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev; + + mdev = container_of(clock, struct mlx5_core_dev, clock); + + return MLX5_CAP_MCAM_FEATURE(mdev, mtutc_time_adjustment_extended_range) ? + MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX : + MLX5_MTUTC_OPERATION_ADJUST_TIME_MAX; +} + +static bool mlx5_is_mtutc_time_adj_cap(struct mlx5_core_dev *mdev, s64 delta) +{ + s64 max = mlx5_ptp_getmaxphase(&mdev->clock.ptp_info); - if (delta < min || delta > max) + if (delta < -max || delta > max) return false; return true; @@ -221,10 +249,15 @@ static void mlx5_timestamp_overflow(struct work_struct *work) clock = container_of(timer, struct mlx5_clock, timer); mdev = container_of(clock, struct mlx5_core_dev, clock); + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + goto out; + write_seqlock_irqsave(&clock->lock, flags); timecounter_read(&timer->tc); mlx5_update_clock_info_page(mdev); write_sequnlock_irqrestore(&clock->lock, flags); + +out: schedule_delayed_work(&timer->overflow_work, timer->overflow_period); } @@ -351,14 +384,6 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) static int mlx5_ptp_adjphase(struct ptp_clock_info *ptp, s32 delta) { - struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); - struct mlx5_core_dev *mdev; - - mdev = container_of(clock, struct mlx5_core_dev, clock); - - if (!mlx5_is_mtutc_time_adj_cap(mdev, delta)) - return -ERANGE; - return mlx5_ptp_adjtime(ptp, delta); } @@ -734,6 +759,7 @@ static const struct ptp_clock_info mlx5_ptp_clock_info = { .pps = 0, .adjfine = mlx5_ptp_adjfine, .adjphase = mlx5_ptp_adjphase, + .getmaxphase = mlx5_ptp_getmaxphase, .adjtime = mlx5_ptp_adjtime, .gettimex64 = mlx5_ptp_gettimex, .settime64 = mlx5_ptp_settime, @@ -905,7 +931,7 @@ static void mlx5_timecounter_init(struct mlx5_core_dev *mdev) dev_freq = MLX5_CAP_GEN(mdev, device_frequency_khz); timer->cycles.read = read_internal_timer; - timer->cycles.shift = MLX5_CYCLES_SHIFT; + timer->cycles.shift = mlx5_ptp_shift_constant(dev_freq); timer->cycles.mult = clocksource_khz2mult(dev_freq, timer->cycles.shift); timer->nominal_c_mult = timer->cycles.mult; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c index b7d779d08d83..78c94b22bdc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c @@ -19,7 +19,7 @@ struct mlx5_devcom_component { mlx5_devcom_event_handler_t handler; struct rw_semaphore sem; - bool paired; + bool ready; }; struct mlx5_devcom_list { @@ -75,13 +75,14 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev) if (!mlx5_core_is_pf(dev)) return NULL; - if (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_DEVCOM_PORTS_SUPPORTED) + if (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_DEVCOM_PORTS_SUPPORTED) return NULL; mlx5_dev_list_lock(); sguid0 = mlx5_query_nic_system_image_guid(dev); list_for_each_entry(iter, &devcom_list, list) { - struct mlx5_core_dev *tmp_dev = NULL; + /* There is at least one device in iter */ + struct mlx5_core_dev *tmp_dev; idx = -1; for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) { @@ -193,7 +194,7 @@ void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom, int mlx5_devcom_send_event(struct mlx5_devcom *devcom, enum mlx5_devcom_components id, - int event, + int event, int rollback_event, void *event_data) { struct mlx5_devcom_component *comp; @@ -210,84 +211,134 @@ int mlx5_devcom_send_event(struct mlx5_devcom *devcom, if (i != devcom->idx && data) { err = comp->handler(event, data, event_data); - break; + if (err) + goto rollback; } } up_write(&comp->sem); + return 0; + +rollback: + while (i--) { + void *data = rcu_dereference_protected(comp->device[i].data, + lockdep_is_held(&comp->sem)); + + if (i != devcom->idx && data) + comp->handler(rollback_event, data, event_data); + } + + up_write(&comp->sem); return err; } -void mlx5_devcom_set_paired(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id, - bool paired) +void mlx5_devcom_comp_set_ready(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + bool ready) { struct mlx5_devcom_component *comp; comp = &devcom->priv->components[id]; WARN_ON(!rwsem_is_locked(&comp->sem)); - WRITE_ONCE(comp->paired, paired); + WRITE_ONCE(comp->ready, ready); } -bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id) +bool mlx5_devcom_comp_is_ready(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) { if (IS_ERR_OR_NULL(devcom)) return false; - return READ_ONCE(devcom->priv->components[id].paired); + return READ_ONCE(devcom->priv->components[id].ready); } -void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id) +bool mlx5_devcom_for_each_peer_begin(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) { struct mlx5_devcom_component *comp; - int i; if (IS_ERR_OR_NULL(devcom)) - return NULL; + return false; comp = &devcom->priv->components[id]; down_read(&comp->sem); - if (!READ_ONCE(comp->paired)) { + if (!READ_ONCE(comp->ready)) { up_read(&comp->sem); - return NULL; + return false; } - for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) - if (i != devcom->idx) - break; + return true; +} + +void mlx5_devcom_for_each_peer_end(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) +{ + struct mlx5_devcom_component *comp = &devcom->priv->components[id]; - return rcu_dereference_protected(comp->device[i].data, lockdep_is_held(&comp->sem)); + up_read(&comp->sem); } -void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id) +void *mlx5_devcom_get_next_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + int *i) { struct mlx5_devcom_component *comp; - int i; + void *ret; + int idx; - if (IS_ERR_OR_NULL(devcom)) - return NULL; + comp = &devcom->priv->components[id]; - for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) - if (i != devcom->idx) - break; + if (*i == MLX5_DEVCOM_PORTS_SUPPORTED) + return NULL; + for (idx = *i; idx < MLX5_DEVCOM_PORTS_SUPPORTED; idx++) { + if (idx != devcom->idx) { + ret = rcu_dereference_protected(comp->device[idx].data, + lockdep_is_held(&comp->sem)); + if (ret) + break; + } + } - comp = &devcom->priv->components[id]; - /* This can change concurrently, however 'data' pointer will remain - * valid for the duration of RCU read section. - */ - if (!READ_ONCE(comp->paired)) + if (idx == MLX5_DEVCOM_PORTS_SUPPORTED) { + *i = idx; return NULL; + } + *i = idx + 1; - return rcu_dereference(comp->device[i].data); + return ret; } -void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id) +void *mlx5_devcom_get_next_peer_data_rcu(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + int *i) { - struct mlx5_devcom_component *comp = &devcom->priv->components[id]; + struct mlx5_devcom_component *comp; + void *ret; + int idx; - up_read(&comp->sem); + comp = &devcom->priv->components[id]; + + if (*i == MLX5_DEVCOM_PORTS_SUPPORTED) + return NULL; + for (idx = *i; idx < MLX5_DEVCOM_PORTS_SUPPORTED; idx++) { + if (idx != devcom->idx) { + /* This can change concurrently, however 'data' pointer will remain + * valid for the duration of RCU read section. + */ + if (!READ_ONCE(comp->ready)) + return NULL; + ret = rcu_dereference(comp->device[idx].data); + if (ret) + break; + } + } + + if (idx == MLX5_DEVCOM_PORTS_SUPPORTED) { + *i = idx; + return NULL; + } + *i = idx + 1; + + return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h index 9a496f4722da..d953a01b8eaa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -6,7 +6,7 @@ #include <linux/mlx5/driver.h> -#define MLX5_DEVCOM_PORTS_SUPPORTED 2 +#define MLX5_DEVCOM_PORTS_SUPPORTED 4 enum mlx5_devcom_components { MLX5_DEVCOM_ESW_OFFLOADS, @@ -30,20 +30,33 @@ void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom, int mlx5_devcom_send_event(struct mlx5_devcom *devcom, enum mlx5_devcom_components id, - int event, + int event, int rollback_event, void *event_data); -void mlx5_devcom_set_paired(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id, - bool paired); -bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id); +void mlx5_devcom_comp_set_ready(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + bool ready); +bool mlx5_devcom_comp_is_ready(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); -void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id); -void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id); -void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom, +bool mlx5_devcom_for_each_peer_begin(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); +void mlx5_devcom_for_each_peer_end(struct mlx5_devcom *devcom, enum mlx5_devcom_components id); +void *mlx5_devcom_get_next_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, int *i); -#endif +#define mlx5_devcom_for_each_peer_entry(devcom, id, data, i) \ + for (i = 0, data = mlx5_devcom_get_next_peer_data(devcom, id, &i); \ + data; \ + data = mlx5_devcom_get_next_peer_data(devcom, id, &i)) + +void *mlx5_devcom_get_next_peer_data_rcu(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, int *i); +#define mlx5_devcom_for_each_peer_entry_rcu(devcom, id, data, i) \ + for (i = 0, data = mlx5_devcom_get_next_peer_data_rcu(devcom, id, &i); \ + data; \ + data = mlx5_devcom_get_next_peer_data_rcu(devcom, id, &i)) + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/events.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/events.h new file mode 100644 index 000000000000..a0f7faea317b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/events.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __LIB_EVENTS_H__ +#define __LIB_EVENTS_H__ + +#include "mlx5_core.h" + +#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF +#define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF + +enum port_module_event_status_type { + MLX5_MODULE_STATUS_PLUGGED = 0x1, + MLX5_MODULE_STATUS_UNPLUGGED = 0x2, + MLX5_MODULE_STATUS_ERROR = 0x3, + MLX5_MODULE_STATUS_DISABLED = 0x4, + MLX5_MODULE_STATUS_NUM, +}; + +enum port_module_event_error_type { + MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED = 0x0, + MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX = 0x1, + MLX5_MODULE_EVENT_ERROR_BUS_STUCK = 0x2, + MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT = 0x3, + MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST = 0x4, + MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER = 0x5, + MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6, + MLX5_MODULE_EVENT_ERROR_BAD_CABLE = 0x7, + MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED = 0xc, + MLX5_MODULE_EVENT_ERROR_NUM, +}; + +struct mlx5_pme_stats { + u64 status_counters[MLX5_MODULE_STATUS_NUM]; + u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM]; +}; + +void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats); +int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index db9df9798ffa..a80ecb672f33 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -178,7 +178,7 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains, if (!mlx5_chains_ignore_flow_level_supported(chains) || (chain == 0 && prio == 1 && level == 0)) { ft_attr.level = chains->fs_base_level; - ft_attr.prio = chains->fs_base_prio; + ft_attr.prio = chains->fs_base_prio + prio - 1; ns = (chains->ns == MLX5_FLOW_NAMESPACE_FDB) ? mlx5_get_fdb_sub_ns(chains->dev, chain) : mlx5_get_flow_namespace(chains->dev, chains->ns); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index ccf12f7db6f0..2b5826a785c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -45,40 +45,6 @@ int mlx5_crdump_enable(struct mlx5_core_dev *dev); void mlx5_crdump_disable(struct mlx5_core_dev *dev); int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data); -/* TODO move to lib/events.h */ - -#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF -#define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF - -enum port_module_event_status_type { - MLX5_MODULE_STATUS_PLUGGED = 0x1, - MLX5_MODULE_STATUS_UNPLUGGED = 0x2, - MLX5_MODULE_STATUS_ERROR = 0x3, - MLX5_MODULE_STATUS_DISABLED = 0x4, - MLX5_MODULE_STATUS_NUM, -}; - -enum port_module_event_error_type { - MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED = 0x0, - MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX = 0x1, - MLX5_MODULE_EVENT_ERROR_BUS_STUCK = 0x2, - MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT = 0x3, - MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST = 0x4, - MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER = 0x5, - MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6, - MLX5_MODULE_EVENT_ERROR_BAD_CABLE = 0x7, - MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED = 0xc, - MLX5_MODULE_EVENT_ERROR_NUM, -}; - -struct mlx5_pme_stats { - u64 status_counters[MLX5_MODULE_STATUS_NUM]; - u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM]; -}; - -void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats); -int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data); - static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev) { return devlink_net(priv_to_devlink(dev)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c index 8ff16318e32d..4450091e181a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -99,7 +99,7 @@ int mlx5_mpfs_init(struct mlx5_core_dev *dev) int l2table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); struct mlx5_mpfs *mpfs; - if (!MLX5_ESWITCH_MANAGER(dev)) + if (!MLX5_ESWITCH_MANAGER(dev) || l2table_size == 1) return 0; mpfs = kzalloc(sizeof(*mpfs), GFP_KERNEL); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c index 696e45e2bd06..e223e0e46433 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c @@ -24,7 +24,8 @@ static const u32 tout_def_sw_val[MAX_TIMEOUT_TYPES] = { [MLX5_TO_TEARDOWN_MS] = 3000, [MLX5_TO_FSM_REACTIVATE_MS] = 5000, [MLX5_TO_RECLAIM_PAGES_MS] = 5000, - [MLX5_TO_RECLAIM_VFS_PAGES_MS] = 120000 + [MLX5_TO_RECLAIM_VFS_PAGES_MS] = 120000, + [MLX5_TO_RESET_UNLOAD_MS] = 300000 }; static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_types type) @@ -118,7 +119,8 @@ u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type) #define MLX5_TIMEOUT_FILL(fld, reg_out, dev, to_type, to_extra) \ ({ \ u64 fw_to = MLX5_TIMEOUT_QUERY(fld, reg_out); \ - tout_set(dev, fw_to + (to_extra), to_type); \ + if (fw_to) \ + tout_set(dev, fw_to + (to_extra), to_type); \ fw_to; \ }) @@ -146,6 +148,7 @@ static int tout_query_dtor(struct mlx5_core_dev *dev) MLX5_TIMEOUT_FILL(fsm_reactivate_to, out, dev, MLX5_TO_FSM_REACTIVATE_MS, 0); MLX5_TIMEOUT_FILL(reclaim_pages_to, out, dev, MLX5_TO_RECLAIM_PAGES_MS, 0); MLX5_TIMEOUT_FILL(reclaim_vfs_pages_to, out, dev, MLX5_TO_RECLAIM_VFS_PAGES_MS, 0); + MLX5_TIMEOUT_FILL(reset_unload_to, out, dev, MLX5_TO_RESET_UNLOAD_MS, 0); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h index bc9e9aeda847..99e0a05526fe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h @@ -26,6 +26,7 @@ enum mlx5_timeouts_types { MLX5_TO_FSM_REACTIVATE_MS, MLX5_TO_RECLAIM_PAGES_MS, MLX5_TO_RECLAIM_VFS_PAGES_MS, + MLX5_TO_RESET_UNLOAD_MS, MAX_TIMEOUT_TYPES }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index d6ee016deae1..72ae560a1c68 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -619,6 +619,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_event)) MLX5_SET(cmd_hca_cap, set_hca_cap, pci_sync_for_fw_update_event, 1); + if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_with_driver_unload)) + MLX5_SET(cmd_hca_cap, set_hca_cap, + pci_sync_for_fw_update_with_driver_unload, 1); if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports)) MLX5_SET(cmd_hca_cap, @@ -1118,7 +1121,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_devcom_unregister_device(dev->priv.devcom); } -static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout) +static int mlx5_function_enable(struct mlx5_core_dev *dev, bool boot, u64 timeout) { int err; @@ -1183,28 +1186,56 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout goto reclaim_boot_pages; } + return 0; + +reclaim_boot_pages: + mlx5_reclaim_startup_pages(dev); +err_disable_hca: + mlx5_core_disable_hca(dev, 0); +stop_health_poll: + mlx5_stop_health_poll(dev, boot); +err_cmd_cleanup: + mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); + mlx5_cmd_cleanup(dev); + + return err; +} + +static void mlx5_function_disable(struct mlx5_core_dev *dev, bool boot) +{ + mlx5_reclaim_startup_pages(dev); + mlx5_core_disable_hca(dev, 0); + mlx5_stop_health_poll(dev, boot); + mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); + mlx5_cmd_cleanup(dev); +} + +static int mlx5_function_open(struct mlx5_core_dev *dev) +{ + int err; + err = set_hca_ctrl(dev); if (err) { mlx5_core_err(dev, "set_hca_ctrl failed\n"); - goto reclaim_boot_pages; + return err; } err = set_hca_cap(dev); if (err) { mlx5_core_err(dev, "set_hca_cap failed\n"); - goto reclaim_boot_pages; + return err; } err = mlx5_satisfy_startup_pages(dev, 0); if (err) { mlx5_core_err(dev, "failed to allocate init pages\n"); - goto reclaim_boot_pages; + return err; } err = mlx5_cmd_init_hca(dev, sw_owner_id); if (err) { mlx5_core_err(dev, "init hca failed\n"); - goto reclaim_boot_pages; + return err; } mlx5_set_driver_version(dev); @@ -1212,26 +1243,13 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout err = mlx5_query_hca_caps(dev); if (err) { mlx5_core_err(dev, "query hca failed\n"); - goto reclaim_boot_pages; + return err; } mlx5_start_health_fw_log_up(dev); - return 0; - -reclaim_boot_pages: - mlx5_reclaim_startup_pages(dev); -err_disable_hca: - mlx5_core_disable_hca(dev, 0); -stop_health_poll: - mlx5_stop_health_poll(dev, boot); -err_cmd_cleanup: - mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); - mlx5_cmd_cleanup(dev); - - return err; } -static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) +static int mlx5_function_close(struct mlx5_core_dev *dev) { int err; @@ -1240,15 +1258,33 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n"); return err; } - mlx5_reclaim_startup_pages(dev); - mlx5_core_disable_hca(dev, 0); - mlx5_stop_health_poll(dev, boot); - mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); - mlx5_cmd_cleanup(dev); return 0; } +static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout) +{ + int err; + + err = mlx5_function_enable(dev, boot, timeout); + if (err) + return err; + + err = mlx5_function_open(dev); + if (err) + mlx5_function_disable(dev, boot); + return err; +} + +static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) +{ + int err = mlx5_function_close(dev); + + if (!err) + mlx5_function_disable(dev, boot); + return err; +} + static int mlx5_load(struct mlx5_core_dev *dev) { int err; @@ -1391,12 +1427,11 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_put_uars_page(dev, dev->priv.uar); } -int mlx5_init_one(struct mlx5_core_dev *dev) +int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev) { - struct devlink *devlink = priv_to_devlink(dev); + bool light_probe = mlx5_dev_is_lightweight(dev); int err = 0; - devl_lock(devlink); mutex_lock(&dev->intf_state_mutex); dev->state = MLX5_DEVICE_STATE_UP; @@ -1410,9 +1445,14 @@ int mlx5_init_one(struct mlx5_core_dev *dev) goto function_teardown; } - err = mlx5_devlink_params_register(priv_to_devlink(dev)); - if (err) - goto err_devlink_params_reg; + /* In case of light_probe, mlx5_devlink is already registered. + * Hence, don't register devlink again. + */ + if (!light_probe) { + err = mlx5_devlink_params_register(priv_to_devlink(dev)); + if (err) + goto err_devlink_params_reg; + } err = mlx5_load(dev); if (err) @@ -1425,14 +1465,14 @@ int mlx5_init_one(struct mlx5_core_dev *dev) goto err_register; mutex_unlock(&dev->intf_state_mutex); - devl_unlock(devlink); return 0; err_register: clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); mlx5_unload(dev); err_load: - mlx5_devlink_params_unregister(priv_to_devlink(dev)); + if (!light_probe) + mlx5_devlink_params_unregister(priv_to_devlink(dev)); err_devlink_params_reg: mlx5_cleanup_once(dev); function_teardown: @@ -1440,6 +1480,16 @@ function_teardown: err_function: dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; mutex_unlock(&dev->intf_state_mutex); + return err; +} + +int mlx5_init_one(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + int err; + + devl_lock(devlink); + err = mlx5_init_one_devl_locked(dev); devl_unlock(devlink); return err; } @@ -1456,6 +1506,7 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev) if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { mlx5_core_warn(dev, "%s: interface is down, NOP\n", __func__); + mlx5_devlink_params_unregister(priv_to_devlink(dev)); mlx5_cleanup_once(dev); goto out; } @@ -1557,6 +1608,100 @@ void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend) devl_unlock(devlink); } +/* In case of light probe, we don't need a full query of hca_caps, but only the bellow caps. + * A full query of hca_caps will be done when the device will reload. + */ +static int mlx5_query_hca_caps_light(struct mlx5_core_dev *dev) +{ + int err; + + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL); + if (err) + return err; + + if (MLX5_CAP_GEN(dev, eth_net_offloads)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, nic_flow_table) || + MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE); + if (err) + return err; + } + + if (MLX5_CAP_GEN_64(dev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) { + err = mlx5_core_get_caps(dev, MLX5_CAP_VDPA_EMULATION); + if (err) + return err; + } + + return 0; +} + +int mlx5_init_one_light(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + int err; + + dev->state = MLX5_DEVICE_STATE_UP; + err = mlx5_function_enable(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); + if (err) { + mlx5_core_warn(dev, "mlx5_function_enable err=%d\n", err); + goto out; + } + + err = mlx5_query_hca_caps_light(dev); + if (err) { + mlx5_core_warn(dev, "mlx5_query_hca_caps_light err=%d\n", err); + goto query_hca_caps_err; + } + + devl_lock(devlink); + err = mlx5_devlink_params_register(priv_to_devlink(dev)); + devl_unlock(devlink); + if (err) { + mlx5_core_warn(dev, "mlx5_devlink_param_reg err = %d\n", err); + goto query_hca_caps_err; + } + + return 0; + +query_hca_caps_err: + mlx5_function_disable(dev, true); +out: + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + return err; +} + +void mlx5_uninit_one_light(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + + devl_lock(devlink); + mlx5_devlink_params_unregister(priv_to_devlink(dev)); + devl_unlock(devlink); + if (dev->state != MLX5_DEVICE_STATE_UP) + return; + mlx5_function_disable(dev, true); +} + +/* xxx_light() function are used in order to configure the device without full + * init (light init). e.g.: There isn't a point in reload a device to light state. + * Hence, mlx5_load_one_light() isn't needed. + */ + +void mlx5_unload_one_light(struct mlx5_core_dev *dev) +{ + if (dev->state != MLX5_DEVICE_STATE_UP) + return; + mlx5_function_disable(dev, false); + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; +} + static const int types[] = { MLX5_CAP_GENERAL, MLX5_CAP_GENERAL_2, @@ -1809,7 +1954,7 @@ static void remove_one(struct pci_dev *pdev) mlx5_drain_fw_reset(dev); mlx5_drain_health_wq(dev); devlink_unregister(devlink); - mlx5_sriov_disable(pdev); + mlx5_sriov_disable(pdev, false); mlx5_thermal_uninit(dev); mlx5_crdump_disable(dev); mlx5_uninit_one(dev); @@ -1844,7 +1989,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, mlx5_enter_error_state(dev, false); mlx5_error_sw_reset(dev); - mlx5_unload_one(dev, true); + mlx5_unload_one(dev, false); mlx5_drain_health_wq(dev); mlx5_pci_disable_device(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 229520405d4a..682d3dc00dd1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -195,7 +195,7 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev); int mlx5_sriov_attach(struct mlx5_core_dev *dev); void mlx5_sriov_detach(struct mlx5_core_dev *dev); int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); -void mlx5_sriov_disable(struct pci_dev *pdev); +void mlx5_sriov_disable(struct pci_dev *pdev, bool num_vf_change); int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); @@ -240,11 +240,14 @@ int mlx5_attach_device(struct mlx5_core_dev *dev); void mlx5_detach_device(struct mlx5_core_dev *dev, bool suspend); int mlx5_register_device(struct mlx5_core_dev *dev); void mlx5_unregister_device(struct mlx5_core_dev *dev); +void mlx5_dev_set_lightweight(struct mlx5_core_dev *dev); +bool mlx5_dev_is_lightweight(struct mlx5_core_dev *dev); struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev); void mlx5_dev_list_lock(void); void mlx5_dev_list_unlock(void); int mlx5_dev_list_trylock(void); +void mlx5_fw_reporters_create(struct mlx5_core_dev *dev); int mlx5_query_mtpps(struct mlx5_core_dev *dev, u32 *mtpps, u32 mtpps_size); int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size); int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode); @@ -307,16 +310,20 @@ static inline bool mlx5_core_is_sf(const struct mlx5_core_dev *dev) int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx); void mlx5_mdev_uninit(struct mlx5_core_dev *dev); int mlx5_init_one(struct mlx5_core_dev *dev); +int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev); void mlx5_uninit_one(struct mlx5_core_dev *dev); void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend); void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev, bool suspend); int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery); int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery); +int mlx5_init_one_light(struct mlx5_core_dev *dev); +void mlx5_uninit_one_light(struct mlx5_core_dev *dev); +void mlx5_unload_one_light(struct mlx5_core_dev *dev); -int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 function_id, +int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 vport, u16 opmod); -#define mlx5_vport_get_other_func_general_cap(dev, fid, out) \ - mlx5_vport_get_other_func_cap(dev, fid, out, MLX5_CAP_GENERAL) +#define mlx5_vport_get_other_func_general_cap(dev, vport, out) \ + mlx5_vport_get_other_func_cap(dev, vport, out, MLX5_CAP_GENERAL) void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work); static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev) @@ -331,4 +338,31 @@ bool mlx5_rdma_supported(struct mlx5_core_dev *dev); bool mlx5_vnet_supported(struct mlx5_core_dev *dev); bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev); +static inline u16 mlx5_core_ec_vf_vport_base(const struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN_2(dev, ec_vf_vport_base); +} + +static inline u16 mlx5_core_ec_sriov_enabled(const struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf(dev) && mlx5_core_ec_vf_vport_base(dev); +} + +static inline bool mlx5_core_is_ec_vf_vport(const struct mlx5_core_dev *dev, u16 vport_num) +{ + int base_vport = mlx5_core_ec_vf_vport_base(dev); + int max_vport = base_vport + mlx5_core_max_ec_vfs(dev); + + if (!mlx5_core_ec_sriov_enabled(dev)) + return false; + + return (vport_num >= base_vport && vport_num < max_vport); +} + +static inline int mlx5_vport_to_func_id(const struct mlx5_core_dev *dev, u16 vport, bool ec_vf_func) +{ + return ec_vf_func ? vport - mlx5_core_ec_vf_vport_base(dev) + 1 + : vport; +} + #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 95dc67fb3001..dcf58efac159 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -79,7 +79,13 @@ static u16 func_id_to_type(struct mlx5_core_dev *dev, u16 func_id, bool ec_funct if (!func_id) return mlx5_core_is_ecpf(dev) && !ec_function ? MLX5_HOST_PF : MLX5_PF; - return func_id <= mlx5_core_max_vfs(dev) ? MLX5_VF : MLX5_SF; + if (func_id <= max(mlx5_core_max_vfs(dev), mlx5_core_max_ec_vfs(dev))) { + if (ec_function) + return MLX5_EC_VF; + else + return MLX5_VF; + } + return MLX5_SF; } static u32 mlx5_get_ec_function(u32 function) @@ -730,6 +736,9 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) WARN(dev->priv.page_counters[MLX5_HOST_PF], "External host PF FW pages counter is %d after reclaiming all pages\n", dev->priv.page_counters[MLX5_HOST_PF]); + WARN(dev->priv.page_counters[MLX5_EC_VF], + "EC VFs FW pages counter is %d after reclaiming all pages\n", + dev->priv.page_counters[MLX5_EC_VF]); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 98412bd5a696..cba2a4afb5fd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -41,6 +41,15 @@ struct mlx5_irq_table { struct mlx5_irq_pool *sf_comp_pool; }; +static int mlx5_core_func_to_vport(const struct mlx5_core_dev *dev, + int func, + bool ec_vf_func) +{ + if (!ec_vf_func) + return func; + return mlx5_core_ec_vf_vport_base(dev) + func - 1; +} + /** * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors * to be ssigned to each VF. @@ -79,6 +88,8 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); void *hca_cap = NULL, *query_cap = NULL, *cap; int num_vf_msix, min_msix, max_msix; + bool ec_vf_function; + int vport; int ret; num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); @@ -104,7 +115,9 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, goto out; } - ret = mlx5_vport_get_other_func_general_cap(dev, function_id, query_cap); + ec_vf_function = mlx5_core_ec_sriov_enabled(dev); + vport = mlx5_core_func_to_vport(dev, function_id, ec_vf_function); + ret = mlx5_vport_get_other_func_general_cap(dev, vport, query_cap); if (ret) goto out; @@ -115,6 +128,7 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP); MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1); + MLX5_SET(set_hca_cap_in, hca_cap, ec_vf_function, ec_vf_function); MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id); MLX5_SET(set_hca_cap_in, hca_cap, op_mod, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index 540cf05f6373..a42f6cd99b74 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -30,9 +30,8 @@ static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev) struct mlx5_flow_spec *spec; struct mlx5_flow_table *ft; struct mlx5_flow_group *fg; - void *match_criteria; + struct mlx5_eswitch *esw; u32 *flow_group_in; - void *misc; int err; if (!(MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) && @@ -63,12 +62,8 @@ static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev) goto free; } - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS); - match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, - match_criteria); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - misc_parameters.source_port); + esw = dev->priv.eswitch; + mlx5_esw_set_flow_group_source_port(esw, flow_group_in, 0); fg = mlx5_create_flow_group(ft, flow_group_in); if (IS_ERR(fg)) { @@ -77,14 +72,7 @@ static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev) goto destroy_flow_table; } - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, - dev->priv.eswitch->manager_vport); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + mlx5_esw_set_spec_source_port(esw, esw->manager_vport, spec); flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, NULL, 0); @@ -115,7 +103,7 @@ free: static void mlx5_rdma_del_roce_addr(struct mlx5_core_dev *dev) { - mlx5_core_roce_gid_set(dev, 0, 0, 0, + mlx5_core_roce_gid_set(dev, 0, MLX5_ROCE_VERSION_2, 0, NULL, NULL, false, 0, 1); } @@ -135,7 +123,7 @@ static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev) mlx5_rdma_make_default_gid(dev, &gid); return mlx5_core_roce_gid_set(dev, 0, - MLX5_ROCE_VERSION_1, + MLX5_ROCE_VERSION_2, 0, gid.raw, mac, false, 0, 1); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index 0692363cf80e..8fe82f1191bb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -3,6 +3,7 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/device.h> +#include <linux/mlx5/eswitch.h> #include "mlx5_core.h" #include "dev.h" #include "devlink.h" @@ -28,6 +29,10 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia mdev->priv.adev_idx = adev->id; sf_dev->mdev = mdev; + /* Only local SFs do light probe */ + if (MLX5_ESWITCH_MANAGER(sf_dev->parent_mdev)) + mlx5_dev_set_lightweight(mdev); + err = mlx5_mdev_init(mdev, MLX5_SF_PROF); if (err) { mlx5_core_warn(mdev, "mlx5_mdev_init on err=%d\n", err); @@ -41,7 +46,10 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia goto remap_err; } - err = mlx5_init_one(mdev); + if (MLX5_ESWITCH_MANAGER(sf_dev->parent_mdev)) + err = mlx5_init_one_light(mdev); + else + err = mlx5_init_one(mdev); if (err) { mlx5_core_warn(mdev, "mlx5_init_one err=%d\n", err); goto init_one_err; @@ -65,7 +73,10 @@ static void mlx5_sf_dev_remove(struct auxiliary_device *adev) mlx5_drain_health_wq(sf_dev->mdev); devlink_unregister(devlink); - mlx5_uninit_one(sf_dev->mdev); + if (mlx5_dev_is_lightweight(sf_dev->mdev)) + mlx5_uninit_one_light(sf_dev->mdev); + else + mlx5_uninit_one(sf_dev->mdev); iounmap(sf_dev->mdev->iseg); mlx5_mdev_uninit(sf_dev->mdev); mlx5_devlink_free(devlink); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index 7d955a4d9f14..6a3fa30b2bf2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -28,7 +28,6 @@ struct mlx5_sf_table { struct mutex sf_state_lock; /* Serializes sf state among user cmds & vhca event handler. */ struct notifier_block esw_nb; struct notifier_block vhca_nb; - u8 ecpu: 1; }; static struct mlx5_sf * @@ -283,7 +282,7 @@ out: static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, const struct devlink_port_new_attrs *new_attr, struct netlink_ext_ack *extack, - unsigned int *new_port_index) + struct devlink_port **dl_port) { struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5_sf *sf; @@ -297,7 +296,7 @@ static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, new_attr->controller, new_attr->sfnum); if (err) goto esw_err; - *new_port_index = sf->port_index; + *dl_port = &sf->dl_port; trace_mlx5_sf_add(dev, sf->port_index, sf->controller, sf->hw_fn_id, new_attr->sfnum); return 0; @@ -339,7 +338,7 @@ mlx5_sf_new_check_attr(struct mlx5_core_dev *dev, const struct devlink_port_new_ int mlx5_devlink_sf_port_new(struct devlink *devlink, const struct devlink_port_new_attrs *new_attr, struct netlink_ext_ack *extack, - unsigned int *new_port_index) + struct devlink_port **dl_port) { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_sf_table *table; @@ -355,7 +354,7 @@ int mlx5_devlink_sf_port_new(struct devlink *devlink, "Port add is only supported in eswitch switchdev mode or SF ports are disabled."); return -EOPNOTSUPP; } - err = mlx5_sf_add(dev, table, new_attr, extack, new_port_index); + err = mlx5_sf_add(dev, table, new_attr, extack, dl_port); mlx5_sf_table_put(table); return err; } @@ -379,7 +378,8 @@ static void mlx5_sf_dealloc(struct mlx5_sf_table *table, struct mlx5_sf *sf) } } -int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index, +int mlx5_devlink_sf_port_del(struct devlink *devlink, + struct devlink_port *dl_port, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); @@ -394,7 +394,7 @@ int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index, "Port del is only supported in eswitch switchdev mode or SF ports are disabled."); return -EOPNOTSUPP; } - sf = mlx5_sf_lookup_by_index(table, port_index); + sf = mlx5_sf_lookup_by_index(table, dl_port->index); if (!sf) { err = -ENODEV; goto sf_err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h index 3a480e06ecc0..860f9ddb7107 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h @@ -21,8 +21,9 @@ void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev); int mlx5_devlink_sf_port_new(struct devlink *devlink, const struct devlink_port_new_attrs *add_attr, struct netlink_ext_ack *extack, - unsigned int *new_port_index); -int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index, + struct devlink_port **dl_port); +int mlx5_devlink_sf_port_del(struct devlink *devlink, + struct devlink_port *dl_port, struct netlink_ext_ack *extack); int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port, enum devlink_port_fn_state *state, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 20d7662c10fb..a2fc937d5461 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -37,7 +37,7 @@ #include "mlx5_irq.h" #include "eswitch.h" -static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf) +static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf, u16 func_id) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; struct mlx5_hca_vport_context *in; @@ -59,7 +59,7 @@ static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf) !!(in->node_guid) * MLX5_HCA_VPORT_SEL_NODE_GUID | !!(in->policy) * MLX5_HCA_VPORT_SEL_STATE_POLICY; - err = mlx5_core_modify_hca_vport_context(dev, 1, 1, vf + 1, in); + err = mlx5_core_modify_hca_vport_context(dev, 1, 1, func_id, in); if (err) mlx5_core_warn(dev, "modify vport context failed, unable to restore VF %d settings\n", vf); @@ -73,9 +73,7 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; int err, vf, num_msix_count; - - if (!MLX5_ESWITCH_MANAGER(dev)) - goto enable_vfs_hca; + int vport_num; err = mlx5_eswitch_enable(dev->priv.eswitch, num_vfs); if (err) { @@ -84,7 +82,6 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) return err; } -enable_vfs_hca: num_msix_count = mlx5_get_default_msix_vec_count(dev, num_vfs); for (vf = 0; vf < num_vfs; vf++) { /* Notify the VF before its enablement to let it set @@ -108,7 +105,10 @@ enable_vfs_hca: sriov->vfs_ctx[vf].enabled = 1; if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) { - err = sriov_restore_guids(dev, vf); + vport_num = mlx5_core_ec_sriov_enabled(dev) ? + mlx5_core_ec_vf_vport_base(dev) + vf + : vf + 1; + err = sriov_restore_guids(dev, vf, vport_num); if (err) { mlx5_core_warn(dev, "failed to restore VF %d settings, err %d\n", @@ -123,9 +123,11 @@ enable_vfs_hca: } static void -mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf) +mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf, bool num_vf_change) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; + bool wait_for_ec_vf_pages = true; + bool wait_for_vf_pages = true; int err; int vf; @@ -147,11 +149,30 @@ mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf) mlx5_eswitch_disable_sriov(dev->priv.eswitch, clear_vf); + /* There are a number of scenarios when SRIOV is being disabled: + * 1. VFs or ECVFs had been created, and now set back to 0 (num_vf_change == true). + * - If EC SRIOV is enabled then this flow is happening on the + * embedded platform, wait for only EC VF pages. + * - If EC SRIOV is not enabled this flow is happening on non-embedded + * platform, wait for the VF pages. + * + * 2. The driver is being unloaded. In this case wait for all pages. + */ + if (num_vf_change) { + if (mlx5_core_ec_sriov_enabled(dev)) + wait_for_vf_pages = false; + else + wait_for_ec_vf_pages = false; + } + + if (wait_for_ec_vf_pages && mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_EC_VF])) + mlx5_core_warn(dev, "timeout reclaiming EC VFs pages\n"); + /* For ECPFs, skip waiting for host VF pages until ECPF is destroyed */ if (mlx5_core_is_ecpf(dev)) return; - if (mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_VF])) + if (wait_for_vf_pages && mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_VF])) mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); } @@ -172,12 +193,12 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) err = pci_enable_sriov(pdev, num_vfs); if (err) { mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err); - mlx5_device_disable_sriov(dev, num_vfs, true); + mlx5_device_disable_sriov(dev, num_vfs, true, true); } return err; } -void mlx5_sriov_disable(struct pci_dev *pdev) +void mlx5_sriov_disable(struct pci_dev *pdev, bool num_vf_change) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct devlink *devlink = priv_to_devlink(dev); @@ -185,7 +206,7 @@ void mlx5_sriov_disable(struct pci_dev *pdev) pci_disable_sriov(pdev); devl_lock(devlink); - mlx5_device_disable_sriov(dev, num_vfs, true); + mlx5_device_disable_sriov(dev, num_vfs, true, num_vf_change); devl_unlock(devlink); } @@ -200,7 +221,7 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) if (num_vfs) err = mlx5_sriov_enable(pdev, num_vfs); else - mlx5_sriov_disable(pdev); + mlx5_sriov_disable(pdev, true); if (!err) sriov->num_vfs = num_vfs; @@ -245,7 +266,7 @@ void mlx5_sriov_detach(struct mlx5_core_dev *dev) if (!mlx5_core_is_pf(dev)) return; - mlx5_device_disable_sriov(dev, pci_num_vf(dev->pdev), false); + mlx5_device_disable_sriov(dev, pci_num_vf(dev->pdev), false, false); } static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev) @@ -264,8 +285,7 @@ static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev) host_total_vfs = MLX5_GET(query_esw_functions_out, out, host_params_context.host_total_vfs); kvfree(out); - if (host_total_vfs) - return host_total_vfs; + return host_total_vfs; } done: @@ -284,6 +304,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) total_vfs = pci_sriov_get_totalvfs(pdev); sriov->max_vfs = mlx5_get_max_vfs(dev); sriov->num_vfs = pci_num_vf(pdev); + sriov->max_ec_vfs = mlx5_core_ec_sriov_enabled(dev) ? pci_sriov_get_totalvfs(dev->pdev) : 0; sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); if (!sriov->vfs_ctx) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 0f783e7906cb..54bb0866ed72 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -2077,8 +2077,9 @@ mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn, struct mlx5dr_action *action; u8 peer_vport; - peer_vport = vhca_id_valid && (vhca_id != dmn->info.caps.gvmi); - vport_dmn = peer_vport ? dmn->peer_dmn : dmn; + peer_vport = vhca_id_valid && mlx5_core_is_pf(dmn->mdev) && + (vhca_id != dmn->info.caps.gvmi); + vport_dmn = peer_vport ? xa_load(&dmn->peer_dmn_xa, vhca_id) : dmn; if (!vport_dmn) { mlx5dr_dbg(dmn, "No peer vport domain for given vhca_id\n"); return NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c index 1aa525e509f1..8c2a34a0d6be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -34,6 +34,7 @@ int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev, int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_vport, u16 vport_number, u16 *gvmi) { + bool ec_vf_func = other_vport ? mlx5_core_is_ec_vf_vport(mdev, vport_number) : false; u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; int out_size; void *out; @@ -46,7 +47,8 @@ int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_vport, MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); MLX5_SET(query_hca_cap_in, in, other_function, other_vport); - MLX5_SET(query_hca_cap_in, in, function_id, vport_number); + MLX5_SET(query_hca_cap_in, in, function_id, mlx5_vport_to_func_id(mdev, vport_number, ec_vf_func)); + MLX5_SET(query_hca_cap_in, in, ec_vf_function, ec_vf_func); MLX5_SET(query_hca_cap_in, in, op_mod, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 | HCA_CAP_OPMOD_GET_CUR); @@ -562,11 +564,12 @@ int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev, err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); if (err) - return err; + goto err_free_in; *reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id); - kvfree(in); +err_free_in: + kvfree(in); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index 9a2dfe6ebe31..3d74109f8230 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -475,6 +475,7 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) mutex_init(&dmn->info.rx.mutex); mutex_init(&dmn->info.tx.mutex); xa_init(&dmn->definers_xa); + xa_init(&dmn->peer_dmn_xa); if (dr_domain_caps_init(mdev, dmn)) { mlx5dr_err(dmn, "Failed init domain, no caps\n"); @@ -507,6 +508,7 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) uninit_caps: dr_domain_caps_uninit(dmn); def_xa_destroy: + xa_destroy(&dmn->peer_dmn_xa); xa_destroy(&dmn->definers_xa); kfree(dmn); return NULL; @@ -547,6 +549,7 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn) dr_domain_uninit_csum_recalc_fts(dmn); dr_domain_uninit_resources(dmn); dr_domain_caps_uninit(dmn); + xa_destroy(&dmn->peer_dmn_xa); xa_destroy(&dmn->definers_xa); mutex_destroy(&dmn->info.tx.mutex); mutex_destroy(&dmn->info.rx.mutex); @@ -555,17 +558,22 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn) } void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, - struct mlx5dr_domain *peer_dmn) + struct mlx5dr_domain *peer_dmn, + u16 peer_vhca_id) { + struct mlx5dr_domain *peer; + mlx5dr_domain_lock(dmn); - if (dmn->peer_dmn) - refcount_dec(&dmn->peer_dmn->refcount); + peer = xa_load(&dmn->peer_dmn_xa, peer_vhca_id); + if (peer) + refcount_dec(&peer->refcount); - dmn->peer_dmn = peer_dmn; + WARN_ON(xa_err(xa_store(&dmn->peer_dmn_xa, peer_vhca_id, peer_dmn, GFP_KERNEL))); - if (dmn->peer_dmn) - refcount_inc(&dmn->peer_dmn->refcount); + peer = xa_load(&dmn->peer_dmn_xa, peer_vhca_id); + if (peer) + refcount_inc(&peer->refcount); mlx5dr_domain_unlock(dmn); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c index d6947fe13d56..8ca534ef5d03 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c @@ -82,7 +82,7 @@ dr_ptrn_alloc_pattern(struct mlx5dr_ptrn_mgr *mgr, u32 chunk_size; u32 index; - chunk_size = ilog2(num_of_actions); + chunk_size = ilog2(roundup_pow_of_two(num_of_actions)); /* HW modify action index granularity is at least 64B */ chunk_size = max_t(u32, chunk_size, DR_CHUNK_SIZE_8); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c index 2010d4ac6519..f708b029425a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c @@ -1647,21 +1647,23 @@ dr_ste_v0_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, u8 *tag) { struct mlx5dr_match_misc *misc = &value->misc; + int id = misc->source_eswitch_owner_vhca_id; struct mlx5dr_cmd_vport_cap *vport_cap; struct mlx5dr_domain *dmn = sb->dmn; struct mlx5dr_domain *vport_dmn; u8 *bit_mask = sb->bit_mask; + struct mlx5dr_domain *peer; bool source_gvmi_set; DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn); if (sb->vhca_id_valid) { + peer = xa_load(&dmn->peer_dmn_xa, id); /* Find port GVMI based on the eswitch_owner_vhca_id */ - if (misc->source_eswitch_owner_vhca_id == dmn->info.caps.gvmi) + if (id == dmn->info.caps.gvmi) vport_dmn = dmn; - else if (dmn->peer_dmn && (misc->source_eswitch_owner_vhca_id == - dmn->peer_dmn->info.caps.gvmi)) - vport_dmn = dmn->peer_dmn; + else if (peer && (id == peer->info.caps.gvmi)) + vport_dmn = peer; else return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c index 4c0704ad166b..dd856cde188d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c @@ -1979,20 +1979,22 @@ static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, u8 *tag) { struct mlx5dr_match_misc *misc = &value->misc; + int id = misc->source_eswitch_owner_vhca_id; struct mlx5dr_cmd_vport_cap *vport_cap; struct mlx5dr_domain *dmn = sb->dmn; struct mlx5dr_domain *vport_dmn; u8 *bit_mask = sb->bit_mask; + struct mlx5dr_domain *peer; DR_STE_SET_TAG(src_gvmi_qp_v1, tag, source_qp, misc, source_sqn); if (sb->vhca_id_valid) { + peer = xa_load(&dmn->peer_dmn_xa, id); /* Find port GVMI based on the eswitch_owner_vhca_id */ - if (misc->source_eswitch_owner_vhca_id == dmn->info.caps.gvmi) + if (id == dmn->info.caps.gvmi) vport_dmn = dmn; - else if (dmn->peer_dmn && (misc->source_eswitch_owner_vhca_id == - dmn->peer_dmn->info.caps.gvmi)) - vport_dmn = dmn->peer_dmn; + else if (peer && (id == peer->info.caps.gvmi)) + vport_dmn = peer; else return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 678a993ab053..6c59de3e28f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -935,7 +935,6 @@ struct mlx5dr_domain_info { }; struct mlx5dr_domain { - struct mlx5dr_domain *peer_dmn; struct mlx5_core_dev *mdev; u32 pdn; struct mlx5_uars_page *uar; @@ -956,6 +955,7 @@ struct mlx5dr_domain { struct list_head dbg_tbl_list; struct mlx5dr_dbg_dump_info dump_info; struct xarray definers_xa; + struct xarray peer_dmn_xa; /* memory management statistics */ u32 num_buddies[DR_ICM_TYPE_MAX]; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index cc215beb7436..feb307fb3440 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -780,14 +780,15 @@ restore_fte: } static int mlx5_cmd_dr_set_peer(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_root_namespace *peer_ns) + struct mlx5_flow_root_namespace *peer_ns, + u16 peer_vhca_id) { struct mlx5dr_domain *peer_domain = NULL; if (peer_ns) peer_domain = peer_ns->fs_dr_domain.dr_domain; mlx5dr_domain_set_peer(ns->fs_dr_domain.dr_domain, - peer_domain); + peer_domain, peer_vhca_id); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h index d1c04f43d86d..89fced86936f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -48,7 +48,8 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *domain); int mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags); void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, - struct mlx5dr_domain *peer_dmn); + struct mlx5dr_domain *peer_dmn, + u16 peer_vhca_id); struct mlx5dr_table * mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/thermal.c b/drivers/net/ethernet/mellanox/mlx5/core/thermal.c index e47fa6fb836f..52199d39657e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/thermal.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/thermal.c @@ -45,7 +45,7 @@ static int mlx5_thermal_get_mtmp_temp(struct mlx5_core_dev *mdev, u32 id, int *p static int mlx5_thermal_get_temp(struct thermal_zone_device *tzdev, int *p_temp) { - struct mlx5_thermal *thermal = tzdev->devdata; + struct mlx5_thermal *thermal = thermal_zone_device_priv(tzdev); struct mlx5_core_dev *mdev = thermal->mdev; int err; @@ -68,30 +68,36 @@ static struct thermal_zone_device_ops mlx5_thermal_ops = { int mlx5_thermal_init(struct mlx5_core_dev *mdev) { + char data[THERMAL_NAME_LENGTH]; struct mlx5_thermal *thermal; - struct thermal_zone_device *tzd; - const char *data = "mlx5"; + int err; - tzd = thermal_zone_get_zone_by_name(data); - if (!IS_ERR(tzd)) + if (!mlx5_core_is_pf(mdev) && !mlx5_core_is_ecpf(mdev)) return 0; + err = snprintf(data, sizeof(data), "mlx5_%s", dev_name(mdev->device)); + if (err < 0 || err >= sizeof(data)) { + mlx5_core_err(mdev, "Failed to setup thermal zone name, %d\n", err); + return -EINVAL; + } + thermal = kzalloc(sizeof(*thermal), GFP_KERNEL); if (!thermal) return -ENOMEM; thermal->mdev = mdev; - thermal->tzdev = thermal_zone_device_register(data, - MLX5_THERMAL_NUM_TRIPS, - MLX5_THERMAL_TRIP_MASK, - thermal, - &mlx5_thermal_ops, - NULL, 0, MLX5_THERMAL_POLL_INT_MSEC); + thermal->tzdev = thermal_zone_device_register_with_trips(data, + NULL, + MLX5_THERMAL_NUM_TRIPS, + MLX5_THERMAL_TRIP_MASK, + thermal, + &mlx5_thermal_ops, + NULL, 0, MLX5_THERMAL_POLL_INT_MSEC); if (IS_ERR(thermal->tzdev)) { - dev_err(mdev->device, "Failed to register thermal zone device (%s) %ld\n", - data, PTR_ERR(thermal->tzdev)); + err = PTR_ERR(thermal->tzdev); + mlx5_core_err(mdev, "Failed to register thermal zone device (%s) %d\n", data, err); kfree(thermal); - return -EINVAL; + return err; } mdev->thermal = thermal; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index ba7e3df22413..5a31fb47ffa5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -288,7 +288,8 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type); MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); - MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + if (vport || mlx5_core_is_ecpf(dev)) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); if (err) @@ -1160,23 +1161,26 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev) } EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); -int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out, +int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *out, u16 opmod) { + bool ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport); u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {}; opmod = (opmod << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01); MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); MLX5_SET(query_hca_cap_in, in, op_mod, opmod); - MLX5_SET(query_hca_cap_in, in, function_id, function_id); + MLX5_SET(query_hca_cap_in, in, function_id, mlx5_vport_to_func_id(dev, vport, ec_vf_func)); MLX5_SET(query_hca_cap_in, in, other_function, true); + MLX5_SET(query_hca_cap_in, in, ec_vf_function, ec_vf_func); return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out); } EXPORT_SYMBOL_GPL(mlx5_vport_get_other_func_cap); int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, - u16 function_id, u16 opmod) + u16 vport, u16 opmod) { + bool ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport); int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); void *set_hca_cap; void *set_ctx; @@ -1190,8 +1194,10 @@ int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap MLX5_SET(set_hca_cap_in, set_ctx, op_mod, opmod << 1); set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); memcpy(set_hca_cap, hca_cap, MLX5_ST_SZ_BYTES(cmd_hca_cap)); - MLX5_SET(set_hca_cap_in, set_ctx, function_id, function_id); + MLX5_SET(set_hca_cap_in, set_ctx, function_id, + mlx5_vport_to_func_id(dev, vport, ec_vf_func)); MLX5_SET(set_hca_cap_in, set_ctx, other_function, true); + MLX5_SET(set_hca_cap_in, set_ctx, ec_vf_function, ec_vf_func); ret = mlx5_cmd_exec_in(dev, set_hca_cap, set_ctx); kfree(set_ctx); diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h index b001e5258091..47f6cc0401c3 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h @@ -44,7 +44,7 @@ MLXFW_MFA2_TLV(multi, struct mlxfw_mfa2_tlv_multi, MLXFW_MFA2_TLV_MULTI_PART); struct mlxfw_mfa2_tlv_psid { - u8 psid[0]; + DECLARE_FLEX_ARRAY(u8, psid); } __packed; MLXFW_MFA2_TLV_VARSIZE(psid, struct mlxfw_mfa2_tlv_psid, diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 22db0bb15c45..1ccf3b73ed72 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1723,8 +1723,6 @@ static const struct devlink_ops mlxsw_devlink_ops = { BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE), .reload_down = mlxsw_devlink_core_bus_device_reload_down, .reload_up = mlxsw_devlink_core_bus_device_reload_up, - .port_split = mlxsw_devlink_port_split, - .port_unsplit = mlxsw_devlink_port_unsplit, .sb_pool_get = mlxsw_devlink_sb_pool_get, .sb_pool_set = mlxsw_devlink_sb_pool_set, .sb_port_pool_get = mlxsw_devlink_sb_port_pool_get, @@ -3116,6 +3114,11 @@ u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_res_get); +static const struct devlink_port_ops mlxsw_devlink_port_ops = { + .port_split = mlxsw_devlink_port_split, + .port_unsplit = mlxsw_devlink_port_unsplit, +}; + static int __mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u16 local_port, enum devlink_port_flavour flavour, u8 slot_index, u32 port_number, bool split, @@ -3150,7 +3153,8 @@ static int __mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u16 local_port, devlink_port_linecard_set(devlink_port, linecard->devlink_linecard); } - err = devl_port_register(devlink, devlink_port, local_port); + err = devl_port_register_with_ops(devlink, devlink_port, local_port, + &mlxsw_devlink_port_ops); if (err) memset(mlxsw_core_port, 0, sizeof(*mlxsw_core_port)); return err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c index bd1a51a0a540..973de2adc943 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c @@ -32,8 +32,8 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x18, 0, 8), MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x18, 9, 2), MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x18, 11, 6), - MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_MSB, 0x18, 17, 3), - MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_LSB, 0x18, 20, 8), + MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_MSB, 0x18, 17, 4), + MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_LSB, 0x18, 21, 8), MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_96_127, 0x20, 4), MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_64_95, 0x24, 4), MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_32_63, 0x28, 4), @@ -42,6 +42,7 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_64_95, 0x34, 4), MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_32_63, 0x38, 4), MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_0_31, 0x3C, 4), + MLXSW_AFK_ELEMENT_INFO_U32(FDB_MISS, 0x40, 0, 1), }; struct mlxsw_afk { diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h index 3a037fe47211..65a4abadc7db 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h @@ -35,6 +35,7 @@ enum mlxsw_afk_element { MLXSW_AFK_ELEMENT_IP_DSCP, MLXSW_AFK_ELEMENT_VIRT_ROUTER_MSB, MLXSW_AFK_ELEMENT_VIRT_ROUTER_LSB, + MLXSW_AFK_ELEMENT_FDB_MISS, MLXSW_AFK_ELEMENT_MAX, }; @@ -69,7 +70,7 @@ struct mlxsw_afk_element_info { MLXSW_AFK_ELEMENT_INFO(MLXSW_AFK_ELEMENT_TYPE_BUF, \ _element, _offset, 0, _size) -#define MLXSW_AFK_ELEMENT_STORAGE_SIZE 0x40 +#define MLXSW_AFK_ELEMENT_STORAGE_SIZE 0x44 struct mlxsw_afk_element_inst { /* element instance in actual block */ enum mlxsw_afk_element element; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index 70735068cf29..0fd290d776ff 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -405,7 +405,8 @@ mlxsw_hwmon_module_temp_label_show(struct device *dev, container_of(attr, struct mlxsw_hwmon_attr, dev_attr); return sprintf(buf, "front panel %03u\n", - mlxsw_hwmon_attr->type_index); + mlxsw_hwmon_attr->type_index + 1 - + mlxsw_hwmon_attr->mlxsw_hwmon_dev->sensor_count); } static ssize_t diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index 2c586c2308ae..d23f293e285c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -48,6 +48,7 @@ #define MLXSW_I2C_MBOX_SIZE_BITS 12 #define MLXSW_I2C_ADDR_BUF_SIZE 4 #define MLXSW_I2C_BLK_DEF 32 +#define MLXSW_I2C_BLK_MAX 100 #define MLXSW_I2C_RETRY 5 #define MLXSW_I2C_TIMEOUT_MSECS 5000 #define MLXSW_I2C_MAX_DATA_SIZE 256 @@ -444,7 +445,7 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size, } else { /* No input mailbox is case of initialization query command. */ reg_size = MLXSW_I2C_MAX_DATA_SIZE; - num = reg_size / mlxsw_i2c->block_size; + num = DIV_ROUND_UP(reg_size, mlxsw_i2c->block_size); if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) { dev_err(&client->dev, "Could not acquire lock"); @@ -653,7 +654,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client) return -EOPNOTSUPP; } - mlxsw_i2c->block_size = max_t(u16, MLXSW_I2C_BLK_DEF, + mlxsw_i2c->block_size = min_t(u16, MLXSW_I2C_BLK_MAX, min_t(u16, quirks->max_read_len, quirks->max_write_len)); } else { @@ -751,7 +752,7 @@ static void mlxsw_i2c_remove(struct i2c_client *client) int mlxsw_i2c_driver_register(struct i2c_driver *i2c_driver) { - i2c_driver->probe_new = mlxsw_i2c_probe; + i2c_driver->probe = mlxsw_i2c_probe; i2c_driver->remove = mlxsw_i2c_remove; return i2c_add_driver(i2c_driver); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c index 6b56eadd736e..6b98c3287b49 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c @@ -417,6 +417,7 @@ static int mlxsw_m_linecards_init(struct mlxsw_m *mlxsw_m) err_kmalloc_array: for (i--; i >= 0; i--) kfree(mlxsw_m->line_cards[i]); + kfree(mlxsw_m->line_cards); err_kcalloc: kfree(mlxsw_m->ports); return err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index c968309657dd..51eea1f0529c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -517,11 +517,15 @@ static void mlxsw_pci_skb_cb_ts_set(struct mlxsw_pci *mlxsw_pci, struct sk_buff *skb, enum mlxsw_pci_cqe_v cqe_v, char *cqe) { + u8 ts_type; + if (cqe_v != MLXSW_PCI_CQE_V2) return; - if (mlxsw_pci_cqe2_time_stamp_type_get(cqe) != - MLXSW_PCI_CQE_TIME_STAMP_TYPE_UTC) + ts_type = mlxsw_pci_cqe2_time_stamp_type_get(cqe); + + if (ts_type != MLXSW_PCI_CQE_TIME_STAMP_TYPE_UTC && + ts_type != MLXSW_PCI_CQE_TIME_STAMP_TYPE_MIRROR_UTC) return; mlxsw_skb_cb(skb)->cqe_ts.sec = mlxsw_pci_cqe2_time_stamp_sec_get(cqe); diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 8165bf31a99a..17160e867bef 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -97,14 +97,6 @@ MLXSW_ITEM32(reg, sspr, m, 0x00, 31, 1); */ MLXSW_ITEM32_LP(reg, sspr, 0x00, 16, 0x00, 12); -/* reg_sspr_sub_port - * Virtual port within the physical port. - * Should be set to 0 when virtual ports are not enabled on the port. - * - * Access: RW - */ -MLXSW_ITEM32(reg, sspr, sub_port, 0x00, 8, 8); - /* reg_sspr_system_port * Unique identifier within the stacking domain that represents all the ports * that are available in the system (external ports). @@ -120,7 +112,6 @@ static inline void mlxsw_reg_sspr_pack(char *payload, u16 local_port) MLXSW_REG_ZERO(sspr, payload); mlxsw_reg_sspr_m_set(payload, 1); mlxsw_reg_sspr_local_port_set(payload, local_port); - mlxsw_reg_sspr_sub_port_set(payload, 0); mlxsw_reg_sspr_system_port_set(payload, local_port); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 02a327744a61..25a01dafde1b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4337,8 +4337,8 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port->default_vlan); /* Join a router interface configured on the LAG, if exists */ - err = mlxsw_sp_port_vlan_router_join(mlxsw_sp_port->default_vlan, - lag_dev, extack); + err = mlxsw_sp_router_port_join_lag(mlxsw_sp_port, lag_dev, + extack); if (err) goto err_router_join; @@ -5139,14 +5139,6 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb, return notifier_from_errno(err); } -static struct notifier_block mlxsw_sp_inetaddr_valid_nb __read_mostly = { - .notifier_call = mlxsw_sp_inetaddr_valid_event, -}; - -static struct notifier_block mlxsw_sp_inet6addr_valid_nb __read_mostly = { - .notifier_call = mlxsw_sp_inet6addr_valid_event, -}; - static const struct pci_device_id mlxsw_sp1_pci_id_table[] = { {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM), 0}, {0, }, @@ -5191,12 +5183,9 @@ static int __init mlxsw_sp_module_init(void) { int err; - register_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb); - register_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); - err = mlxsw_core_driver_register(&mlxsw_sp1_driver); if (err) - goto err_sp1_core_driver_register; + return err; err = mlxsw_core_driver_register(&mlxsw_sp2_driver); if (err) @@ -5242,9 +5231,6 @@ err_sp3_core_driver_register: mlxsw_core_driver_unregister(&mlxsw_sp2_driver); err_sp2_core_driver_register: mlxsw_core_driver_unregister(&mlxsw_sp1_driver); -err_sp1_core_driver_register: - unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); - unregister_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb); return err; } @@ -5258,8 +5244,6 @@ static void __exit mlxsw_sp_module_exit(void) mlxsw_core_driver_unregister(&mlxsw_sp3_driver); mlxsw_core_driver_unregister(&mlxsw_sp2_driver); mlxsw_core_driver_unregister(&mlxsw_sp1_driver); - unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); - unregister_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb); } module_init(mlxsw_sp_module_init); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 4c22f8004514..231e364cbb7c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -755,14 +755,6 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp, const struct net_device *macvlan_dev); -int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, - unsigned long event, void *ptr); -int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, - unsigned long event, void *ptr); -int -mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, - struct net_device *l3_dev, - struct netlink_ext_ack *extack); void mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c index e4f4cded2b6f..b1178b7a7f51 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c @@ -193,7 +193,7 @@ mlxsw_sp2_mr_tcam_rule_parse(struct mlxsw_sp_acl_rule *rule, key->vrid, GENMASK(7, 0)); mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_VIRT_ROUTER_MSB, - key->vrid >> 8, GENMASK(2, 0)); + key->vrid >> 8, GENMASK(3, 0)); switch (key->proto) { case MLXSW_SP_L3_PROTO_IPV4: return mlxsw_sp2_mr_tcam_rule_parse4(rulei, key); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c index 00c32320f891..ae2d6f12b799 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c @@ -123,10 +123,12 @@ const struct mlxsw_afk_ops mlxsw_sp1_afk_ops = { }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_0[] = { + MLXSW_AFK_ELEMENT_INST_U32(FDB_MISS, 0x00, 3, 1), MLXSW_AFK_ELEMENT_INST_BUF(DMAC_0_31, 0x04, 4), }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_1[] = { + MLXSW_AFK_ELEMENT_INST_U32(FDB_MISS, 0x00, 3, 1), MLXSW_AFK_ELEMENT_INST_BUF(SMAC_0_31, 0x04, 4), }; @@ -169,7 +171,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_2[] = { static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_4[] = { MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_LSB, 0x04, 24, 8), - MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_MSB, 0x00, 0, 3), + MLXSW_AFK_ELEMENT_INST_EXT_U32(VIRT_ROUTER_MSB, 0x00, 0, 3, 0, true), }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_0[] = { @@ -319,7 +321,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_5b[] = { static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_4b[] = { MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_LSB, 0x04, 13, 8), - MLXSW_AFK_ELEMENT_INST_EXT_U32(VIRT_ROUTER_MSB, 0x04, 21, 4, 0, true), + MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_MSB, 0x04, 21, 4), }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_2b[] = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 5416093c0e35..c8a356accdf8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -221,7 +221,7 @@ start_again: for (; i < rif_count; i++) { struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); - if (!rif || !mlxsw_sp_rif_dev(rif)) + if (!rif || !mlxsw_sp_rif_has_dev(rif)) continue; err = mlxsw_sp_erif_entry_get(mlxsw_sp, &entry, rif, counters_enabled); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 594cdcb90b3d..72917f09e806 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -281,39 +281,38 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, return 0; } -static int mlxsw_sp_flower_parse_meta(struct mlxsw_sp_acl_rule_info *rulei, - struct flow_cls_offload *f, - struct mlxsw_sp_flow_block *block) +static int +mlxsw_sp_flower_parse_meta_iif(struct mlxsw_sp_acl_rule_info *rulei, + const struct mlxsw_sp_flow_block *block, + const struct flow_match_meta *match, + struct netlink_ext_ack *extack) { - struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct mlxsw_sp_port *mlxsw_sp_port; struct net_device *ingress_dev; - struct flow_match_meta match; - if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) + if (!match->mask->ingress_ifindex) return 0; - flow_rule_match_meta(rule, &match); - if (match.mask->ingress_ifindex != 0xFFFFFFFF) { - NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported ingress ifindex mask"); + if (match->mask->ingress_ifindex != 0xFFFFFFFF) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask"); return -EINVAL; } ingress_dev = __dev_get_by_index(block->net, - match.key->ingress_ifindex); + match->key->ingress_ifindex); if (!ingress_dev) { - NL_SET_ERR_MSG_MOD(f->common.extack, "Can't find specified ingress port to match on"); + NL_SET_ERR_MSG_MOD(extack, "Can't find specified ingress port to match on"); return -EINVAL; } if (!mlxsw_sp_port_dev_check(ingress_dev)) { - NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on non-mlxsw ingress port"); + NL_SET_ERR_MSG_MOD(extack, "Can't match on non-mlxsw ingress port"); return -EINVAL; } mlxsw_sp_port = netdev_priv(ingress_dev); if (mlxsw_sp_port->mlxsw_sp != block->mlxsw_sp) { - NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on a port from different device"); + NL_SET_ERR_MSG_MOD(extack, "Can't match on a port from different device"); return -EINVAL; } @@ -321,9 +320,29 @@ static int mlxsw_sp_flower_parse_meta(struct mlxsw_sp_acl_rule_info *rulei, MLXSW_AFK_ELEMENT_SRC_SYS_PORT, mlxsw_sp_port->local_port, 0xFFFFFFFF); + return 0; } +static int mlxsw_sp_flower_parse_meta(struct mlxsw_sp_acl_rule_info *rulei, + struct flow_cls_offload *f, + struct mlxsw_sp_flow_block *block) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_match_meta match; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) + return 0; + + flow_rule_match_meta(rule, &match); + + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_FDB_MISS, + match.key->l2_miss, match.mask->l2_miss); + + return mlxsw_sp_flower_parse_meta_iif(rulei, block, &match, + f->common.extack); +} + static void mlxsw_sp_flower_parse_ipv4(struct mlxsw_sp_acl_rule_info *rulei, struct flow_cls_offload *f) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c index 1f6bc0c7e91d..69cd689dbc83 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c @@ -704,12 +704,12 @@ void mlxsw_sp_mr_vif_del(struct mlxsw_sp_mr_table *mr_table, vifi_t vif_index) static struct mlxsw_sp_mr_vif * mlxsw_sp_mr_dev_vif_lookup(struct mlxsw_sp_mr_table *mr_table, - const struct net_device *dev) + const struct mlxsw_sp_rif *rif) { vifi_t vif_index; for (vif_index = 0; vif_index < MAXVIFS; vif_index++) - if (mr_table->vifs[vif_index].dev == dev) + if (mlxsw_sp_rif_dev_is(rif, mr_table->vifs[vif_index].dev)) return &mr_table->vifs[vif_index]; return NULL; } @@ -717,13 +717,12 @@ mlxsw_sp_mr_dev_vif_lookup(struct mlxsw_sp_mr_table *mr_table, int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table, const struct mlxsw_sp_rif *rif) { - const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp_mr_vif *mr_vif; - if (!rif_dev) + if (!mlxsw_sp_rif_has_dev(rif)) return 0; - mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev); + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif); if (!mr_vif) return 0; return mlxsw_sp_mr_vif_resolve(mr_table, mr_vif->dev, mr_vif, @@ -733,13 +732,12 @@ int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table, void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table, const struct mlxsw_sp_rif *rif) { - const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp_mr_vif *mr_vif; - if (!rif_dev) + if (!mlxsw_sp_rif_has_dev(rif)) return; - mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev); + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif); if (!mr_vif) return; mlxsw_sp_mr_vif_unresolve(mr_table, mr_vif->dev, mr_vif); @@ -748,17 +746,16 @@ void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table, void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table, const struct mlxsw_sp_rif *rif, int mtu) { - const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; struct mlxsw_sp_mr_route_vif_entry *rve; struct mlxsw_sp_mr *mr = mlxsw_sp->mr; struct mlxsw_sp_mr_vif *mr_vif; - if (!rif_dev) + if (!mlxsw_sp_rif_has_dev(rif)) return; /* Search for a VIF that use that RIF */ - mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev); + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif); if (!mr_vif) return; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c index d309b77a0194..bb8eeb86edf7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c @@ -11,10 +11,12 @@ #include "spectrum_nve.h" #define MLXSW_SP_NVE_VXLAN_IPV4_SUPPORTED_FLAGS (VXLAN_F_UDP_ZERO_CSUM_TX | \ - VXLAN_F_LEARN) + VXLAN_F_LEARN | \ + VXLAN_F_LOCALBYPASS) #define MLXSW_SP_NVE_VXLAN_IPV6_SUPPORTED_FLAGS (VXLAN_F_IPV6 | \ VXLAN_F_UDP_ZERO_CSUM6_TX | \ - VXLAN_F_UDP_ZERO_CSUM6_RX) + VXLAN_F_UDP_ZERO_CSUM6_RX | \ + VXLAN_F_LOCALBYPASS) static bool mlxsw_sp_nve_vxlan_ipv4_flags_check(const struct vxlan_config *cfg, struct netlink_ext_ack *extack) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 4a73e2fe95ef..b32adf277a22 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -51,10 +51,27 @@ struct mlxsw_sp_vr; struct mlxsw_sp_lpm_tree; struct mlxsw_sp_rif_ops; -struct mlxsw_sp_rif { +struct mlxsw_sp_crif_key { + struct net_device *dev; +}; + +struct mlxsw_sp_crif { + struct mlxsw_sp_crif_key key; + struct rhash_head ht_node; + bool can_destroy; struct list_head nexthop_list; + struct mlxsw_sp_rif *rif; +}; + +static const struct rhashtable_params mlxsw_sp_crif_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_crif, key), + .key_len = sizeof_field(struct mlxsw_sp_crif, key), + .head_offset = offsetof(struct mlxsw_sp_crif, ht_node), +}; + +struct mlxsw_sp_rif { + struct mlxsw_sp_crif *crif; /* NULL for underlay RIF */ struct list_head neigh_list; - struct net_device *dev; /* NULL for underlay RIF */ struct mlxsw_sp_fid *fid; unsigned char addr[ETH_ALEN]; int mtu; @@ -71,6 +88,13 @@ struct mlxsw_sp_rif { bool counter_egress_valid; }; +static struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif) +{ + if (!rif->crif) + return NULL; + return rif->crif->key.dev; +} + struct mlxsw_sp_rif_params { struct net_device *dev; union { @@ -96,8 +120,8 @@ struct mlxsw_sp_rif_subport { struct mlxsw_sp_rif_ipip_lb { struct mlxsw_sp_rif common; struct mlxsw_sp_rif_ipip_lb_config lb_config; - u16 ul_vr_id; /* Reserved for Spectrum-2. */ - u16 ul_rif_id; /* Reserved for Spectrum. */ + u16 ul_vr_id; /* Spectrum-1. */ + u16 ul_rif_id; /* Spectrum-2+. */ }; struct mlxsw_sp_rif_params_ipip_lb { @@ -748,10 +772,11 @@ static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr) static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) { + int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); struct mlxsw_sp_vr *vr; int i; - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + for (i = 0; i < max_vrs; i++) { vr = &mlxsw_sp->router->vrs[i]; if (!mlxsw_sp_vr_is_used(vr)) return vr; @@ -792,12 +817,13 @@ static u32 mlxsw_sp_fix_tb_id(u32 tb_id) static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id) { + int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); struct mlxsw_sp_vr *vr; int i; tb_id = mlxsw_sp_fix_tb_id(tb_id); - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + for (i = 0; i < max_vrs; i++) { vr = &mlxsw_sp->router->vrs[i]; if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id) return vr; @@ -959,6 +985,7 @@ static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib *fib, struct mlxsw_sp_lpm_tree *new_tree) { + int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); enum mlxsw_sp_l3proto proto = fib->proto; struct mlxsw_sp_lpm_tree *old_tree; u8 old_id, new_id = new_tree->id; @@ -968,7 +995,7 @@ static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp, old_tree = mlxsw_sp->router->lpm.proto_trees[proto]; old_id = old_tree->id; - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + for (i = 0; i < max_vrs; i++) { vr = &mlxsw_sp->router->vrs[i]; if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id)) continue; @@ -1052,6 +1079,61 @@ u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev) return tb_id; } +static void +mlxsw_sp_crif_init(struct mlxsw_sp_crif *crif, struct net_device *dev) +{ + crif->key.dev = dev; + INIT_LIST_HEAD(&crif->nexthop_list); +} + +static struct mlxsw_sp_crif * +mlxsw_sp_crif_alloc(struct net_device *dev) +{ + struct mlxsw_sp_crif *crif; + + crif = kzalloc(sizeof(*crif), GFP_KERNEL); + if (!crif) + return NULL; + + mlxsw_sp_crif_init(crif, dev); + return crif; +} + +static void mlxsw_sp_crif_free(struct mlxsw_sp_crif *crif) +{ + if (WARN_ON(crif->rif)) + return; + + WARN_ON(!list_empty(&crif->nexthop_list)); + kfree(crif); +} + +static int mlxsw_sp_crif_insert(struct mlxsw_sp_router *router, + struct mlxsw_sp_crif *crif) +{ + return rhashtable_insert_fast(&router->crif_ht, &crif->ht_node, + mlxsw_sp_crif_ht_params); +} + +static void mlxsw_sp_crif_remove(struct mlxsw_sp_router *router, + struct mlxsw_sp_crif *crif) +{ + rhashtable_remove_fast(&router->crif_ht, &crif->ht_node, + mlxsw_sp_crif_ht_params); +} + +static struct mlxsw_sp_crif * +mlxsw_sp_crif_lookup(struct mlxsw_sp_router *router, + const struct net_device *dev) +{ + struct mlxsw_sp_crif_key key = { + .dev = (struct net_device *)dev, + }; + + return rhashtable_lookup_fast(&router->crif_ht, &key, + mlxsw_sp_crif_ht_params); +} + static struct mlxsw_sp_rif * mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_rif_params *params, @@ -1557,6 +1639,7 @@ mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id, u16 ul_rif_id, bool enable) { struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config; + struct net_device *dev = mlxsw_sp_rif_dev(&lb_rif->common); enum mlxsw_reg_ritr_loopback_ipip_options ipip_options; struct mlxsw_sp_rif *rif = &lb_rif->common; struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; @@ -1569,7 +1652,7 @@ mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id, case MLXSW_SP_L3_PROTO_IPV4: saddr4 = be32_to_cpu(lb_cf.saddr.addr4); mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF, - rif->rif_index, rif->vr_id, rif->dev->mtu); + rif->rif_index, rif->vr_id, dev->mtu); mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt, ipip_options, ul_vr_id, ul_rif_id, saddr4, @@ -1579,7 +1662,7 @@ mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id, case MLXSW_SP_L3_PROTO_IPV6: saddr6 = &lb_cf.saddr.addr6; mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF, - rif->rif_index, rif->vr_id, rif->dev->mtu); + rif->rif_index, rif->vr_id, dev->mtu); mlxsw_reg_ritr_loopback_ipip6_pack(ritr_pl, lb_cf.lb_ipipt, ipip_options, ul_vr_id, ul_rif_id, saddr6, @@ -1639,9 +1722,29 @@ static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry); } -static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp, +static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif); + +static void mlxsw_sp_rif_migrate_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *old_rif, - struct mlxsw_sp_rif *new_rif); + struct mlxsw_sp_rif *new_rif, + bool migrate_nhs) +{ + struct mlxsw_sp_crif *crif = old_rif->crif; + struct mlxsw_sp_crif mock_crif = {}; + + if (migrate_nhs) + mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif); + + /* Plant a mock CRIF so that destroying the old RIF doesn't unoffload + * our nexthops and IPIP tunnels, and doesn't sever the crif->rif link. + */ + mlxsw_sp_crif_init(&mock_crif, crif->key.dev); + old_rif->crif = &mock_crif; + mock_crif.rif = old_rif; + mlxsw_sp_rif_destroy(old_rif); +} + static int mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry, @@ -1659,18 +1762,11 @@ mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp, return PTR_ERR(new_lb_rif); ipip_entry->ol_lb = new_lb_rif; - if (keep_encap) - mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common, - &new_lb_rif->common); - - mlxsw_sp_rif_destroy(&old_lb_rif->common); - + mlxsw_sp_rif_migrate_destroy(mlxsw_sp, &old_lb_rif->common, + &new_lb_rif->common, keep_encap); return 0; } -static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *rif); - /** * __mlxsw_sp_ipip_entry_update_tunnel - Update offload related to IPIP entry. * @mlxsw_sp: mlxsw_sp. @@ -2329,7 +2425,7 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, } dipn = htonl(dip); - dev = mlxsw_sp->router->rifs[rif]->dev; + dev = mlxsw_sp_rif_dev(mlxsw_sp->router->rifs[rif]); n = neigh_lookup(&arp_tbl, &dipn, dev); if (!n) return; @@ -2357,7 +2453,7 @@ static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp, return; } - dev = mlxsw_sp->router->rifs[rif]->dev; + dev = mlxsw_sp_rif_dev(mlxsw_sp->router->rifs[rif]); n = neigh_lookup(&nd_tbl, &dip, dev); if (!n) return; @@ -2745,13 +2841,12 @@ static void mlxsw_sp_router_update_priority_work(struct work_struct *work) } static int mlxsw_sp_router_schedule_work(struct net *net, - struct notifier_block *nb, + struct mlxsw_sp_router *router, + struct neighbour *n, void (*cb)(struct work_struct *)) { struct mlxsw_sp_netevent_work *net_work; - struct mlxsw_sp_router *router; - router = container_of(nb, struct mlxsw_sp_router, netevent_nb); if (!net_eq(net, mlxsw_sp_net(router->mlxsw_sp))) return NOTIFY_DONE; @@ -2761,19 +2856,31 @@ static int mlxsw_sp_router_schedule_work(struct net *net, INIT_WORK(&net_work->work, cb); net_work->mlxsw_sp = router->mlxsw_sp; + net_work->n = n; mlxsw_core_schedule_work(&net_work->work); return NOTIFY_DONE; } +static bool mlxsw_sp_dev_lower_is_port(struct net_device *dev) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + + rcu_read_lock(); + mlxsw_sp_port = mlxsw_sp_port_dev_lower_find_rcu(dev); + rcu_read_unlock(); + return !!mlxsw_sp_port; +} + static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, unsigned long event, void *ptr) { - struct mlxsw_sp_netevent_work *net_work; - struct mlxsw_sp_port *mlxsw_sp_port; - struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_router *router; unsigned long interval; struct neigh_parms *p; struct neighbour *n; + struct net *net; + + router = container_of(nb, struct mlxsw_sp_router, netevent_nb); switch (event) { case NETEVENT_DELAY_PROBE_TIME_UPDATE: @@ -2787,51 +2894,37 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, /* We are in atomic context and can't take RTNL mutex, * so use RCU variant to walk the device chain. */ - mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev); - if (!mlxsw_sp_port) + if (!mlxsw_sp_dev_lower_is_port(p->dev)) return NOTIFY_DONE; - mlxsw_sp = mlxsw_sp_port->mlxsw_sp; interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME)); - mlxsw_sp->router->neighs_update.interval = interval; - - mlxsw_sp_port_dev_put(mlxsw_sp_port); + router->neighs_update.interval = interval; break; case NETEVENT_NEIGH_UPDATE: n = ptr; + net = neigh_parms_net(n->parms); if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6) return NOTIFY_DONE; - mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev); - if (!mlxsw_sp_port) + if (!mlxsw_sp_dev_lower_is_port(n->dev)) return NOTIFY_DONE; - net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC); - if (!net_work) { - mlxsw_sp_port_dev_put(mlxsw_sp_port); - return NOTIFY_BAD; - } - - INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work); - net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - net_work->n = n; - /* Take a reference to ensure the neighbour won't be * destructed until we drop the reference in delayed * work. */ neigh_clone(n); - mlxsw_core_schedule_work(&net_work->work); - mlxsw_sp_port_dev_put(mlxsw_sp_port); - break; + return mlxsw_sp_router_schedule_work(net, router, n, + mlxsw_sp_router_neigh_event_work); + case NETEVENT_IPV4_MPATH_HASH_UPDATE: case NETEVENT_IPV6_MPATH_HASH_UPDATE: - return mlxsw_sp_router_schedule_work(ptr, nb, + return mlxsw_sp_router_schedule_work(ptr, router, NULL, mlxsw_sp_router_mp_hash_event_work); case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE: - return mlxsw_sp_router_schedule_work(ptr, nb, + return mlxsw_sp_router_schedule_work(ptr, router, NULL, mlxsw_sp_router_update_priority_work); } @@ -2902,7 +2995,7 @@ struct mlxsw_sp_nexthop_key { struct mlxsw_sp_nexthop { struct list_head neigh_list_node; /* member of neigh entry list */ - struct list_head rif_list_node; + struct list_head crif_list_node; struct list_head router_list_node; struct mlxsw_sp_nexthop_group_info *nhgi; /* pointer back to the group * this nexthop belongs to @@ -2915,7 +3008,7 @@ struct mlxsw_sp_nexthop { int nh_weight; int norm_nh_weight; int num_adj_entries; - struct mlxsw_sp_rif *rif; + struct mlxsw_sp_crif *crif; u8 should_offload:1, /* set indicates this nexthop should be written * to the adjacency table. */ @@ -2935,6 +3028,14 @@ struct mlxsw_sp_nexthop { bool counter_valid; }; +static struct net_device * +mlxsw_sp_nexthop_dev(const struct mlxsw_sp_nexthop *nh) +{ + if (!nh->crif) + return NULL; + return nh->crif->key.dev; +} + enum mlxsw_sp_nexthop_group_type { MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4, MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6, @@ -2952,9 +3053,18 @@ struct mlxsw_sp_nexthop_group_info { is_resilient:1; struct list_head list; /* member in nh_res_grp_list */ struct mlxsw_sp_nexthop nexthops[]; -#define nh_rif nexthops[0].rif }; +static struct mlxsw_sp_rif * +mlxsw_sp_nhgi_rif(const struct mlxsw_sp_nexthop_group_info *nhgi) +{ + struct mlxsw_sp_crif *crif = nhgi->nexthops[0].crif; + + if (!crif) + return NULL; + return crif->rif; +} + struct mlxsw_sp_nexthop_group_vr_key { u16 vr_id; enum mlxsw_sp_l3proto proto; @@ -3076,7 +3186,9 @@ int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index, struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh) { - return nh->rif; + if (WARN_ON(!nh->crif)) + return NULL; + return nh->crif->rif; } bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh) @@ -3461,11 +3573,12 @@ static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, bool force, char *ratr_pl) { struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; + struct mlxsw_sp_rif *rif = mlxsw_sp_nexthop_rif(nh); enum mlxsw_reg_ratr_op op; u16 rif_index; - rif_index = nh->rif ? nh->rif->rif_index : - mlxsw_sp->router->lb_rif_index; + rif_index = rif ? rif->rif_index : + mlxsw_sp->router->lb_crif->rif->rif_index; op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY : MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY; mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_ETHERNET, @@ -4008,16 +4121,18 @@ mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp, { struct neighbour *n, *old_n = neigh_entry->key.n; struct mlxsw_sp_nexthop *nh; + struct net_device *dev; bool entry_connected; u8 nud_state, dead; int err; nh = list_first_entry(&neigh_entry->nexthop_list, struct mlxsw_sp_nexthop, neigh_list_node); + dev = mlxsw_sp_nexthop_dev(nh); - n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev); + n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, dev); if (!n) { - n = neigh_create(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev); + n = neigh_create(nh->neigh_tbl, &nh->gw_addr, dev); if (IS_ERR(n)) return PTR_ERR(n); neigh_event_send(n, NULL); @@ -4081,44 +4196,49 @@ mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, } } -static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh, - struct mlxsw_sp_rif *rif) +static void mlxsw_sp_nexthop_crif_init(struct mlxsw_sp_nexthop *nh, + struct mlxsw_sp_crif *crif) { - if (nh->rif) + if (nh->crif) return; - nh->rif = rif; - list_add(&nh->rif_list_node, &rif->nexthop_list); + nh->crif = crif; + list_add(&nh->crif_list_node, &crif->nexthop_list); } -static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh) +static void mlxsw_sp_nexthop_crif_fini(struct mlxsw_sp_nexthop *nh) { - if (!nh->rif) + if (!nh->crif) return; - list_del(&nh->rif_list_node); - nh->rif = NULL; + list_del(&nh->crif_list_node); + nh->crif = NULL; } static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { struct mlxsw_sp_neigh_entry *neigh_entry; + struct net_device *dev; struct neighbour *n; u8 nud_state, dead; int err; + if (WARN_ON(!nh->crif->rif)) + return 0; + if (!nh->nhgi->gateway || nh->neigh_entry) return 0; + dev = mlxsw_sp_nexthop_dev(nh); /* Take a reference of neigh here ensuring that neigh would * not be destructed before the nexthop entry is finished. * The reference is taken either in neigh_lookup() or * in neigh_create() in case n is not found. */ - n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev); + n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, dev); if (!n) { - n = neigh_create(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev); + n = neigh_create(nh->neigh_tbl, &nh->gw_addr, dev); if (IS_ERR(n)) return PTR_ERR(n); neigh_event_send(n, NULL); @@ -4197,15 +4317,20 @@ static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, struct mlxsw_sp_ipip_entry *ipip_entry) { + struct mlxsw_sp_crif *crif; bool removing; if (!nh->nhgi->gateway || nh->ipip_entry) return; + crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, ipip_entry->ol_dev); + if (WARN_ON(!crif)) + return; + nh->ipip_entry = ipip_entry; removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev); __mlxsw_sp_nexthop_neigh_update(nh, removing); - mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common); + mlxsw_sp_nexthop_crif_init(nh, crif); } static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp, @@ -4237,7 +4362,7 @@ static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp, { const struct mlxsw_sp_ipip_ops *ipip_ops; struct mlxsw_sp_ipip_entry *ipip_entry; - struct mlxsw_sp_rif *rif; + struct mlxsw_sp_crif *crif; int err; ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev); @@ -4251,11 +4376,15 @@ static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp, } nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!rif) + crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, dev); + if (!crif) + return 0; + + mlxsw_sp_nexthop_crif_init(nh, crif); + + if (!crif->rif) return 0; - mlxsw_sp_nexthop_rif_init(nh, rif); err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); if (err) goto err_neigh_init; @@ -4263,25 +4392,30 @@ static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp, return 0; err_neigh_init: - mlxsw_sp_nexthop_rif_fini(nh); + mlxsw_sp_nexthop_crif_fini(nh); return err; } -static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh) +static void mlxsw_sp_nexthop_type_rif_gone(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) { switch (nh->type) { case MLXSW_SP_NEXTHOP_TYPE_ETH: mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); - mlxsw_sp_nexthop_rif_fini(nh); break; case MLXSW_SP_NEXTHOP_TYPE_IPIP: - mlxsw_sp_nexthop_rif_fini(nh); mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh); break; } } +static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh); + mlxsw_sp_nexthop_crif_fini(nh); +} + static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp, struct mlxsw_sp_nexthop *nh, @@ -4368,16 +4502,17 @@ static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp_nexthop *nh; bool removing; - list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) { + list_for_each_entry(nh, &rif->crif->nexthop_list, crif_list_node) { switch (nh->type) { case MLXSW_SP_NEXTHOP_TYPE_ETH: removing = false; break; case MLXSW_SP_NEXTHOP_TYPE_IPIP: - removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev); + removing = !mlxsw_sp_ipip_netdev_ul_up(dev); break; default: WARN_ON(1); @@ -4389,25 +4524,14 @@ static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, } } -static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *old_rif, - struct mlxsw_sp_rif *new_rif) -{ - struct mlxsw_sp_nexthop *nh; - - list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list); - list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node) - nh->rif = new_rif; - mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif); -} - static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { struct mlxsw_sp_nexthop *nh, *tmp; - list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) { - mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); + list_for_each_entry_safe(nh, tmp, &rif->crif->nexthop_list, + crif_list_node) { + mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh); mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); } } @@ -4427,7 +4551,7 @@ static int mlxsw_sp_adj_trap_entry_init(struct mlxsw_sp *mlxsw_sp) mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true, MLXSW_REG_RATR_TYPE_ETHERNET, mlxsw_sp->router->adj_trap_index, - mlxsw_sp->router->lb_rif_index); + mlxsw_sp->router->lb_crif->rif->rif_index); mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action); mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); @@ -4743,21 +4867,19 @@ static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { - u16 lb_rif_index = mlxsw_sp->router->lb_rif_index; - nh->action = MLXSW_SP_NEXTHOP_ACTION_DISCARD; nh->should_offload = 1; /* While nexthops that discard packets do not forward packets * via an egress RIF, they still need to be programmed using a * valid RIF, so use the loopback RIF created during init. */ - nh->rif = mlxsw_sp->router->rifs[lb_rif_index]; + nh->crif = mlxsw_sp->router->lb_crif; } static void mlxsw_sp_nexthop_obj_blackhole_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { - nh->rif = NULL; + nh->crif = NULL; nh->should_offload = 0; } @@ -5491,7 +5613,7 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: return !!nh_group->nhgi->adj_index_valid; case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: - return !!nh_group->nhgi->nh_rif; + return !!mlxsw_sp_nhgi_rif(nh_group->nhgi); case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE: case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP: @@ -5509,9 +5631,10 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, for (i = 0; i < nh_grp->nhgi->count; i++) { struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i]; + struct net_device *dev = mlxsw_sp_nexthop_dev(nh); struct fib6_info *rt = mlxsw_sp_rt6->rt; - if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev && + if (dev && dev == rt->fib6_nh->fib_nh_dev && ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr, &rt->fib6_nh->fib_nh_gw6)) return nh; @@ -5752,7 +5875,8 @@ static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp, trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; adjacency_index = nhgi->adj_index; ecmp_size = nhgi->ecmp_size; - } else if (!nhgi->adj_index_valid && nhgi->count && nhgi->nh_rif) { + } else if (!nhgi->adj_index_valid && nhgi->count && + mlxsw_sp_nhgi_rif(nhgi)) { trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; adjacency_index = mlxsw_sp->router->adj_trap_index; ecmp_size = 1; @@ -5771,7 +5895,7 @@ static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) { - struct mlxsw_sp_rif *rif = fib_entry->nh_group->nhgi->nh_rif; + struct mlxsw_sp_rif *rif = mlxsw_sp_nhgi_rif(fib_entry->nh_group->nhgi); enum mlxsw_reg_ralue_trap_action trap_action; char ralue_pl[MLXSW_REG_RALUE_LEN]; u16 trap_id = 0; @@ -7298,9 +7422,10 @@ static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) { + int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); int i, j; - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + for (i = 0; i < max_vrs; i++) { struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i]; if (!mlxsw_sp_vr_is_used(vr)) @@ -7699,11 +7824,12 @@ static struct mlxsw_sp_rif * mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, const struct net_device *dev) { + int max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); int i; - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) + for (i = 0; i < max_rifs; i++) if (mlxsw_sp->router->rifs[i] && - mlxsw_sp->router->rifs[i]->dev == dev) + mlxsw_sp_rif_dev_is(mlxsw_sp->router->rifs[i], dev)) return mlxsw_sp->router->rifs[i]; return NULL; @@ -7761,33 +7887,52 @@ static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { + /* Signal to nexthop cleanup that the RIF is going away. */ + rif->crif->rif = NULL; + mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index); mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif); mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif); } +static bool __mlxsw_sp_dev_addr_list_empty(const struct net_device *dev) +{ + struct inet6_dev *inet6_dev; + struct in_device *idev; + + idev = __in_dev_get_rcu(dev); + if (idev && idev->ifa_list) + return false; + + inet6_dev = __in6_dev_get(dev); + if (inet6_dev && !list_empty(&inet6_dev->addr_list)) + return false; + + return true; +} + +static bool mlxsw_sp_dev_addr_list_empty(const struct net_device *dev) +{ + bool addr_list_empty; + + rcu_read_lock(); + addr_list_empty = __mlxsw_sp_dev_addr_list_empty(dev); + rcu_read_unlock(); + + return addr_list_empty; +} + static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev, unsigned long event) { - struct inet6_dev *inet6_dev; - bool addr_list_empty = true; - struct in_device *idev; + bool addr_list_empty; switch (event) { case NETDEV_UP: return rif == NULL; case NETDEV_DOWN: - rcu_read_lock(); - idev = __in_dev_get_rcu(dev); - if (idev && idev->ifa_list) - addr_list_empty = false; - - inet6_dev = __in6_dev_get(dev); - if (addr_list_empty && inet6_dev && - !list_empty(&inet6_dev->addr_list)) - addr_list_empty = false; - rcu_read_unlock(); + addr_list_empty = mlxsw_sp_dev_addr_list_empty(dev); /* macvlans do not have a RIF, but rather piggy back on the * RIF of their lower device. @@ -7796,7 +7941,7 @@ mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev, return true; if (rif && addr_list_empty && - !netif_is_l3_slave(rif->dev)) + !netif_is_l3_slave(mlxsw_sp_rif_dev(rif))) return true; /* It is possible we already removed the RIF ourselves * if it was assigned to a netdev that is now a bridge @@ -7854,27 +7999,39 @@ static void mlxsw_sp_rif_index_free(struct mlxsw_sp *mlxsw_sp, u16 rif_index, static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index, u16 vr_id, - struct net_device *l3_dev) + struct mlxsw_sp_crif *crif) { + struct net_device *l3_dev = crif ? crif->key.dev : NULL; struct mlxsw_sp_rif *rif; rif = kzalloc(rif_size, GFP_KERNEL); if (!rif) return NULL; - INIT_LIST_HEAD(&rif->nexthop_list); INIT_LIST_HEAD(&rif->neigh_list); if (l3_dev) { ether_addr_copy(rif->addr, l3_dev->dev_addr); rif->mtu = l3_dev->mtu; - rif->dev = l3_dev; } rif->vr_id = vr_id; rif->rif_index = rif_index; + if (crif) { + rif->crif = crif; + crif->rif = rif; + } return rif; } +static void mlxsw_sp_rif_free(struct mlxsw_sp_rif *rif) +{ + WARN_ON(!list_empty(&rif->neigh_list)); + + if (rif->crif) + rif->crif->rif = NULL; + kfree(rif); +} + struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp, u16 rif_index) { @@ -7893,7 +8050,8 @@ u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif) u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) { - u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev); + struct net_device *dev = mlxsw_sp_rif_dev(&lb_rif->common); + u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev); struct mlxsw_sp_vr *ul_vr; ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL); @@ -8070,12 +8228,18 @@ mlxsw_sp_router_hwstats_notify_schedule(struct net_device *dev) int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif) { - return rif->dev->ifindex; + return mlxsw_sp_rif_dev(rif)->ifindex; } -const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif) +bool mlxsw_sp_rif_has_dev(const struct mlxsw_sp_rif *rif) +{ + return !!mlxsw_sp_rif_dev(rif); +} + +bool mlxsw_sp_rif_dev_is(const struct mlxsw_sp_rif *rif, + const struct net_device *dev) { - return rif->dev; + return mlxsw_sp_rif_dev(rif) == dev; } static void mlxsw_sp_rif_push_l3_stats(struct mlxsw_sp_rif *rif) @@ -8083,7 +8247,7 @@ static void mlxsw_sp_rif_push_l3_stats(struct mlxsw_sp_rif *rif) struct rtnl_hw_stats64 stats = {}; if (!mlxsw_sp_router_port_l3_stats_fetch(rif, &stats)) - netdev_offload_xstats_push_delta(rif->dev, + netdev_offload_xstats_push_delta(mlxsw_sp_rif_dev(rif), NETDEV_OFFLOAD_XSTATS_TYPE_L3, &stats); } @@ -8098,6 +8262,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_rif_ops *ops; struct mlxsw_sp_fid *fid = NULL; enum mlxsw_sp_rif_type type; + struct mlxsw_sp_crif *crif; struct mlxsw_sp_rif *rif; struct mlxsw_sp_vr *vr; u16 rif_index; @@ -8117,12 +8282,18 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, goto err_rif_index_alloc; } - rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev); + crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, params->dev); + if (WARN_ON(!crif)) { + err = -ENOENT; + goto err_crif_lookup; + } + + rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, crif); if (!rif) { err = -ENOMEM; goto err_rif_alloc; } - dev_hold(rif->dev); + dev_hold(params->dev); mlxsw_sp->router->rifs[rif_index] = rif; rif->mlxsw_sp = mlxsw_sp; rif->ops = ops; @@ -8150,12 +8321,12 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, goto err_mr_rif_add; } - if (netdev_offload_xstats_enabled(rif->dev, + if (netdev_offload_xstats_enabled(params->dev, NETDEV_OFFLOAD_XSTATS_TYPE_L3)) { err = mlxsw_sp_router_port_l3_stats_enable(rif); if (err) goto err_stats_enable; - mlxsw_sp_router_hwstats_notify_schedule(rif->dev); + mlxsw_sp_router_hwstats_notify_schedule(params->dev); } else { mlxsw_sp_rif_counters_alloc(rif); } @@ -8173,9 +8344,10 @@ err_configure: mlxsw_sp_fid_put(fid); err_fid_get: mlxsw_sp->router->rifs[rif_index] = NULL; - dev_put(rif->dev); - kfree(rif); + dev_put(params->dev); + mlxsw_sp_rif_free(rif); err_rif_alloc: +err_crif_lookup: mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); err_rif_index_alloc: vr->rif_count--; @@ -8185,8 +8357,10 @@ err_rif_index_alloc: static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); const struct mlxsw_sp_rif_ops *ops = rif->ops; struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_crif *crif = rif->crif; struct mlxsw_sp_fid *fid = rif->fid; u8 rif_entries = rif->rif_entries; u16 rif_index = rif->rif_index; @@ -8197,11 +8371,10 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif); vr = &mlxsw_sp->router->vrs[rif->vr_id]; - if (netdev_offload_xstats_enabled(rif->dev, - NETDEV_OFFLOAD_XSTATS_TYPE_L3)) { + if (netdev_offload_xstats_enabled(dev, NETDEV_OFFLOAD_XSTATS_TYPE_L3)) { mlxsw_sp_rif_push_l3_stats(rif); mlxsw_sp_router_port_l3_stats_disable(rif); - mlxsw_sp_router_hwstats_notify_schedule(rif->dev); + mlxsw_sp_router_hwstats_notify_schedule(dev); } else { mlxsw_sp_rif_counters_free(rif); } @@ -8213,11 +8386,14 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) /* Loopback RIFs are not associated with a FID. */ mlxsw_sp_fid_put(fid); mlxsw_sp->router->rifs[rif->rif_index] = NULL; - dev_put(rif->dev); - kfree(rif); + dev_put(dev); + mlxsw_sp_rif_free(rif); mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); vr->rif_count--; mlxsw_sp_vr_put(mlxsw_sp, vr); + + if (crif->can_destroy) + mlxsw_sp_crif_free(crif); } void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, @@ -8549,25 +8725,20 @@ __mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) mlxsw_sp_rif_subport_put(rif); } -int -mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, - struct net_device *l3_dev, - struct netlink_ext_ack *extack) +static int +mlxsw_sp_port_vlan_router_join_existing(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, + struct net_device *l3_dev, + struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp; - struct mlxsw_sp_rif *rif; - int err = 0; - mutex_lock(&mlxsw_sp->router->lock); - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); - if (!rif) - goto out; + lockdep_assert_held(&mlxsw_sp->router->lock); - err = __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, l3_dev, - extack); -out: - mutex_unlock(&mlxsw_sp->router->lock); - return err; + if (!mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev)) + return 0; + + return __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, l3_dev, + extack); } void @@ -8874,8 +9045,8 @@ out: return notifier_from_errno(err); } -int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, + unsigned long event, void *ptr) { struct in_validator_info *ivi = (struct in_validator_info *) ptr; struct net_device *dev = ivi->ivi_dev->dev; @@ -8957,8 +9128,8 @@ static int mlxsw_sp_inet6addr_event(struct notifier_block *nb, return NOTIFY_DONE; } -int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, + unsigned long event, void *ptr) { struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr; struct net_device *dev = i6vi->i6vi_dev->dev; @@ -9004,7 +9175,7 @@ mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif, struct netlink_ext_ack *extack) { - struct net_device *dev = rif->dev; + struct net_device *dev = mlxsw_sp_rif_dev(rif); u8 old_mac_profile; u16 fid_index; int err; @@ -9088,6 +9259,104 @@ static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif, return -ENOBUFS; } +static bool mlxsw_sp_router_netdevice_interesting(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev) +{ + struct vlan_dev_priv *vlan; + + if (netif_is_lag_master(dev) || + netif_is_bridge_master(dev) || + mlxsw_sp_port_dev_check(dev) || + mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev) || + netif_is_l3_master(dev)) + return true; + + if (!is_vlan_dev(dev)) + return false; + + vlan = vlan_dev_priv(dev); + return netif_is_lag_master(vlan->real_dev) || + netif_is_bridge_master(vlan->real_dev) || + mlxsw_sp_port_dev_check(vlan->real_dev); +} + +static struct mlxsw_sp_crif * +mlxsw_sp_crif_register(struct mlxsw_sp_router *router, struct net_device *dev) +{ + struct mlxsw_sp_crif *crif; + int err; + + if (WARN_ON(mlxsw_sp_crif_lookup(router, dev))) + return NULL; + + crif = mlxsw_sp_crif_alloc(dev); + if (!crif) + return ERR_PTR(-ENOMEM); + + err = mlxsw_sp_crif_insert(router, crif); + if (err) + goto err_netdev_insert; + + return crif; + +err_netdev_insert: + mlxsw_sp_crif_free(crif); + return ERR_PTR(err); +} + +static void mlxsw_sp_crif_unregister(struct mlxsw_sp_router *router, + struct mlxsw_sp_crif *crif) +{ + struct mlxsw_sp_nexthop *nh, *tmp; + + mlxsw_sp_crif_remove(router, crif); + + list_for_each_entry_safe(nh, tmp, &crif->nexthop_list, crif_list_node) + mlxsw_sp_nexthop_type_fini(router->mlxsw_sp, nh); + + if (crif->rif) + crif->can_destroy = true; + else + mlxsw_sp_crif_free(crif); +} + +static int mlxsw_sp_netdevice_register(struct mlxsw_sp_router *router, + struct net_device *dev) +{ + struct mlxsw_sp_crif *crif; + + if (!mlxsw_sp_router_netdevice_interesting(router->mlxsw_sp, dev)) + return 0; + + crif = mlxsw_sp_crif_register(router, dev); + return PTR_ERR_OR_ZERO(crif); +} + +static void mlxsw_sp_netdevice_unregister(struct mlxsw_sp_router *router, + struct net_device *dev) +{ + struct mlxsw_sp_crif *crif; + + if (!mlxsw_sp_router_netdevice_interesting(router->mlxsw_sp, dev)) + return; + + /* netdev_run_todo(), by way of netdev_wait_allrefs_any(), rebroadcasts + * the NETDEV_UNREGISTER message, so we can get here twice. If that's + * what happened, the netdevice state is NETREG_UNREGISTERED. In that + * case, we expect to have collected the CRIF already, and warn if it + * still exists. Otherwise we expect the CRIF to exist. + */ + crif = mlxsw_sp_crif_lookup(router, dev); + if (dev->reg_state == NETREG_UNREGISTERED) { + if (!WARN_ON(crif)) + return; + } + if (WARN_ON(!crif)) + return; + + mlxsw_sp_crif_unregister(router, crif); +} + static bool mlxsw_sp_is_offload_xstats_event(unsigned long event) { switch (event) { @@ -9254,6 +9523,46 @@ mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, return err; } +static int +mlxsw_sp_port_vid_router_join_existing(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid, struct net_device *dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, + vid); + if (WARN_ON(!mlxsw_sp_port_vlan)) + return -EINVAL; + + return mlxsw_sp_port_vlan_router_join_existing(mlxsw_sp_port_vlan, + dev, extack); +} + +static int __mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev, + struct netlink_ext_ack *extack) +{ + u16 default_vid = MLXSW_SP_DEFAULT_VID; + + return mlxsw_sp_port_vid_router_join_existing(mlxsw_sp_port, + default_vid, lag_dev, + extack); +} + +int mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev, + struct netlink_ext_ack *extack) +{ + int err; + + mutex_lock(&mlxsw_sp_port->mlxsw_sp->router->lock); + err = __mlxsw_sp_router_port_join_lag(mlxsw_sp_port, lag_dev, extack); + mutex_unlock(&mlxsw_sp_port->mlxsw_sp->router->lock); + + return err; +} + static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -9267,6 +9576,15 @@ static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb, mutex_lock(&mlxsw_sp->router->lock); + if (event == NETDEV_REGISTER) { + err = mlxsw_sp_netdevice_register(router, dev); + if (err) + /* No need to roll this back, UNREGISTER will collect it + * anyhow. + */ + goto out; + } + if (mlxsw_sp_is_offload_xstats_event(event)) err = mlxsw_sp_netdevice_offload_xstats_cmd(mlxsw_sp, dev, event, ptr); @@ -9281,6 +9599,10 @@ static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb, else if (mlxsw_sp_is_vrf_event(event, ptr)) err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr); + if (event == NETDEV_UNREGISTER) + mlxsw_sp_netdevice_unregister(router, dev); + +out: mutex_unlock(&mlxsw_sp->router->lock); return notifier_from_errno(err); @@ -9300,15 +9622,16 @@ static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); struct netdev_nested_priv priv = { .data = (void *)rif, }; - if (!netif_is_macvlan_port(rif->dev)) + if (!netif_is_macvlan_port(dev)) return 0; - netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n"); - return netdev_walk_all_upper_dev_rcu(rif->dev, + netdev_warn(dev, "Router interface is deleted. Upper macvlans will not work\n"); + return netdev_walk_all_upper_dev_rcu(dev, __mlxsw_sp_rif_macvlan_flush, &priv); } @@ -9329,6 +9652,7 @@ static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif, static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; struct mlxsw_sp_rif_subport *rif_subport; char ritr_pl[MLXSW_REG_RITR_LEN]; @@ -9336,8 +9660,8 @@ static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable) rif_subport = mlxsw_sp_rif_subport_rif(rif); mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF, - rif->rif_index, rif->vr_id, rif->dev->mtu); - mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr); + rif->rif_index, rif->vr_id, dev->mtu); + mlxsw_reg_ritr_mac_pack(ritr_pl, dev->dev_addr); mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id); efid = mlxsw_sp_fid_index(rif->fid); mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag, @@ -9350,6 +9674,7 @@ static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable) static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif, struct netlink_ext_ack *extack) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); u8 mac_profile; int err; @@ -9363,7 +9688,7 @@ static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif, if (err) goto err_rif_subport_op; - err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(rif->fid), true); if (err) goto err_rif_fdb_op; @@ -9375,7 +9700,7 @@ static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif, return 0; err_fid_rif_set: - mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(rif->fid), false); err_rif_fdb_op: mlxsw_sp_rif_subport_op(rif, false); @@ -9386,10 +9711,11 @@ err_rif_subport_op: static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp_fid *fid = rif->fid; mlxsw_sp_fid_rif_unset(fid); - mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(fid), false); mlxsw_sp_rif_macvlan_flush(rif); mlxsw_sp_rif_subport_op(rif, false); @@ -9415,12 +9741,13 @@ static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = { static int mlxsw_sp_rif_fid_op(struct mlxsw_sp_rif *rif, u16 fid, bool enable) { enum mlxsw_reg_ritr_if_type type = MLXSW_REG_RITR_FID_IF; + struct net_device *dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; char ritr_pl[MLXSW_REG_RITR_LEN]; mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id, - rif->dev->mtu); - mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr); + dev->mtu); + mlxsw_reg_ritr_mac_pack(ritr_pl, dev->dev_addr); mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id); mlxsw_reg_ritr_fid_if_fid_set(ritr_pl, fid); @@ -9435,6 +9762,7 @@ u16 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif, struct netlink_ext_ack *extack) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; u16 fid_index = mlxsw_sp_fid_index(rif->fid); u8 mac_profile; @@ -9460,7 +9788,7 @@ static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif, if (err) goto err_fid_bc_flood_set; - err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(rif->fid), true); if (err) goto err_rif_fdb_op; @@ -9472,7 +9800,7 @@ static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif, return 0; err_fid_rif_set: - mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(rif->fid), false); err_rif_fdb_op: mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, @@ -9489,12 +9817,13 @@ err_rif_fid_op: static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); u16 fid_index = mlxsw_sp_fid_index(rif->fid); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; struct mlxsw_sp_fid *fid = rif->fid; mlxsw_sp_fid_rif_unset(fid); - mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(fid), false); mlxsw_sp_rif_macvlan_flush(rif); mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, @@ -9509,7 +9838,9 @@ static struct mlxsw_sp_fid * mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif, struct netlink_ext_ack *extack) { - return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex); + int rif_ifindex = mlxsw_sp_rif_dev_ifindex(rif); + + return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif_ifindex); } static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) @@ -9517,7 +9848,7 @@ static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) struct switchdev_notifier_fdb_info info = {}; struct net_device *dev; - dev = br_fdb_find_port(rif->dev, mac, 0); + dev = br_fdb_find_port(mlxsw_sp_rif_dev(rif), mac, 0); if (!dev) return; @@ -9540,17 +9871,18 @@ static struct mlxsw_sp_fid * mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif, struct netlink_ext_ack *extack) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); struct net_device *br_dev; u16 vid; int err; - if (is_vlan_dev(rif->dev)) { - vid = vlan_dev_vlan_id(rif->dev); - br_dev = vlan_dev_real_dev(rif->dev); + if (is_vlan_dev(dev)) { + vid = vlan_dev_vlan_id(dev); + br_dev = vlan_dev_real_dev(dev); if (WARN_ON(!netif_is_bridge_master(br_dev))) return ERR_PTR(-EINVAL); } else { - err = br_vlan_get_pvid(rif->dev, &vid); + err = br_vlan_get_pvid(dev, &vid); if (err < 0 || !vid) { NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID"); return ERR_PTR(-EINVAL); @@ -9562,12 +9894,13 @@ mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif, static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) { + struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); struct switchdev_notifier_fdb_info info = {}; u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); struct net_device *br_dev; struct net_device *dev; - br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev; + br_dev = is_vlan_dev(rif_dev) ? vlan_dev_real_dev(rif_dev) : rif_dev; dev = br_fdb_find_port(br_dev, mac, vid); if (!dev) return; @@ -9581,11 +9914,12 @@ static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) static int mlxsw_sp_rif_vlan_op(struct mlxsw_sp_rif *rif, u16 vid, u16 efid, bool enable) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; char ritr_pl[MLXSW_REG_RITR_LEN]; mlxsw_reg_ritr_vlan_if_pack(ritr_pl, enable, rif->rif_index, rif->vr_id, - rif->dev->mtu, rif->dev->dev_addr, + dev->mtu, dev->dev_addr, rif->mac_profile_id, vid, efid); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); @@ -9594,6 +9928,7 @@ static int mlxsw_sp_rif_vlan_op(struct mlxsw_sp_rif *rif, u16 vid, u16 efid, static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif, u16 efid, struct netlink_ext_ack *extack) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; u8 mac_profile; @@ -9619,7 +9954,7 @@ static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif, u16 efid, if (err) goto err_fid_bc_flood_set; - err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(rif->fid), true); if (err) goto err_rif_fdb_op; @@ -9631,7 +9966,7 @@ static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif, u16 efid, return 0; err_fid_rif_set: - mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(rif->fid), false); err_rif_fdb_op: mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, @@ -9648,11 +9983,12 @@ err_rif_vlan_fid_op: static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; mlxsw_sp_fid_rif_unset(rif->fid); - mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(rif->fid), false); mlxsw_sp_rif_macvlan_flush(rif); mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, @@ -9719,12 +10055,13 @@ mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif, struct netlink_ext_ack *extack) { struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); - u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev); + struct net_device *dev = mlxsw_sp_rif_dev(rif); + u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; struct mlxsw_sp_vr *ul_vr; int err; - ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL); + ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, extack); if (IS_ERR(ul_vr)) return PTR_ERR(ul_vr); @@ -9786,6 +10123,7 @@ mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable) static struct mlxsw_sp_rif * mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, + struct mlxsw_sp_crif *ul_crif, struct netlink_ext_ack *extack) { struct mlxsw_sp_rif *ul_rif; @@ -9799,7 +10137,8 @@ mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, return ERR_PTR(err); } - ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL); + ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, + ul_crif); if (!ul_rif) { err = -ENOMEM; goto err_rif_alloc; @@ -9817,7 +10156,7 @@ mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, ul_rif_op_err: mlxsw_sp->router->rifs[rif_index] = NULL; - kfree(ul_rif); + mlxsw_sp_rif_free(ul_rif); err_rif_alloc: mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); return ERR_PTR(err); @@ -9832,12 +10171,13 @@ static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif) atomic_sub(rif_entries, &mlxsw_sp->router->rifs_count); mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false); mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL; - kfree(ul_rif); + mlxsw_sp_rif_free(ul_rif); mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); } static struct mlxsw_sp_rif * mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, + struct mlxsw_sp_crif *ul_crif, struct netlink_ext_ack *extack) { struct mlxsw_sp_vr *vr; @@ -9850,7 +10190,7 @@ mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, if (refcount_inc_not_zero(&vr->ul_rif_refcnt)) return vr->ul_rif; - vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack); + vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, ul_crif, extack); if (IS_ERR(vr->ul_rif)) { err = PTR_ERR(vr->ul_rif); goto err_ul_rif_create; @@ -9888,7 +10228,7 @@ int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, int err = 0; mutex_lock(&mlxsw_sp->router->lock); - ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL); + ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL, NULL); if (IS_ERR(ul_rif)) { err = PTR_ERR(ul_rif); goto out; @@ -9918,12 +10258,13 @@ mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif, struct netlink_ext_ack *extack) { struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); - u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev); + struct net_device *dev = mlxsw_sp_rif_dev(rif); + u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; struct mlxsw_sp_rif *ul_rif; int err; - ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL); + ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL, extack); if (IS_ERR(ul_rif)) return PTR_ERR(ul_rif); @@ -10041,11 +10382,12 @@ err_rifs_table_init: static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp) { + int max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); int i; WARN_ON_ONCE(atomic_read(&mlxsw_sp->router->rifs_count)); - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) + for (i = 0; i < max_rifs; i++) WARN_ON_ONCE(mlxsw_sp->router->rifs[i]); devl_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_RIFS); @@ -10444,28 +10786,41 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); } -static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp) +static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp, + struct netlink_ext_ack *extack) { - u16 lb_rif_index; + struct mlxsw_sp_router *router = mlxsw_sp->router; + struct mlxsw_sp_rif *lb_rif; int err; + router->lb_crif = mlxsw_sp_crif_alloc(NULL); + if (!router->lb_crif) + return -ENOMEM; + /* Create a generic loopback RIF associated with the main table * (default VRF). Any table can be used, but the main table exists - * anyway, so we do not waste resources. + * anyway, so we do not waste resources. Loopback RIFs are usually + * created with a NULL CRIF, but this RIF is used as a fallback RIF + * for blackhole nexthops, and nexthops expect to have a valid CRIF. */ - err = mlxsw_sp_router_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN, - &lb_rif_index); - if (err) - return err; - - mlxsw_sp->router->lb_rif_index = lb_rif_index; + lb_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN, router->lb_crif, + extack); + if (IS_ERR(lb_rif)) { + err = PTR_ERR(lb_rif); + goto err_ul_rif_get; + } return 0; + +err_ul_rif_get: + mlxsw_sp_crif_free(router->lb_crif); + return err; } static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp) { - mlxsw_sp_router_ul_rif_put(mlxsw_sp, mlxsw_sp->router->lb_rif_index); + mlxsw_sp_ul_rif_put(mlxsw_sp->router->lb_crif->rif); + mlxsw_sp_crif_free(mlxsw_sp->router->lb_crif); } static int mlxsw_sp1_router_init(struct mlxsw_sp *mlxsw_sp) @@ -10504,6 +10859,7 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, struct netlink_ext_ack *extack) { struct mlxsw_sp_router *router; + struct notifier_block *nb; int err; router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL); @@ -10525,14 +10881,19 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, if (err) goto err_router_init; - err = mlxsw_sp_rifs_init(mlxsw_sp); - if (err) - goto err_rifs_init; - err = mlxsw_sp->router_ops->ipips_init(mlxsw_sp); if (err) goto err_ipips_init; + err = rhashtable_init(&mlxsw_sp->router->crif_ht, + &mlxsw_sp_crif_ht_params); + if (err) + goto err_crif_ht_init; + + err = mlxsw_sp_rifs_init(mlxsw_sp); + if (err) + goto err_rifs_init; + err = rhashtable_init(&mlxsw_sp->router->nexthop_ht, &mlxsw_sp_nexthop_ht_params); if (err) @@ -10556,7 +10917,7 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, if (err) goto err_vrs_init; - err = mlxsw_sp_lb_rif_init(mlxsw_sp); + err = mlxsw_sp_lb_rif_init(mlxsw_sp, extack); if (err) goto err_lb_rif_init; @@ -10582,6 +10943,17 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, if (err) goto err_register_inet6addr_notifier; + router->inetaddr_valid_nb.notifier_call = mlxsw_sp_inetaddr_valid_event; + err = register_inetaddr_validator_notifier(&router->inetaddr_valid_nb); + if (err) + goto err_register_inetaddr_valid_notifier; + + nb = &router->inet6addr_valid_nb; + nb->notifier_call = mlxsw_sp_inet6addr_valid_event; + err = register_inet6addr_validator_notifier(nb); + if (err) + goto err_register_inet6addr_valid_notifier; + mlxsw_sp->router->netevent_nb.notifier_call = mlxsw_sp_router_netevent_event; err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb); @@ -10621,6 +10993,10 @@ err_register_fib_notifier: err_register_nexthop_notifier: unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); err_register_netevent_notifier: + unregister_inet6addr_validator_notifier(&router->inet6addr_valid_nb); +err_register_inet6addr_valid_notifier: + unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb); +err_register_inetaddr_valid_notifier: unregister_inet6addr_notifier(&router->inet6addr_nb); err_register_inet6addr_notifier: unregister_inetaddr_notifier(&router->inetaddr_nb); @@ -10643,10 +11019,12 @@ err_lpm_init: err_nexthop_group_ht_init: rhashtable_destroy(&mlxsw_sp->router->nexthop_ht); err_nexthop_ht_init: - mlxsw_sp_ipips_fini(mlxsw_sp); -err_ipips_init: mlxsw_sp_rifs_fini(mlxsw_sp); err_rifs_init: + rhashtable_destroy(&mlxsw_sp->router->crif_ht); +err_crif_ht_init: + mlxsw_sp_ipips_fini(mlxsw_sp); +err_ipips_init: __mlxsw_sp_router_fini(mlxsw_sp); err_router_init: cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw); @@ -10658,15 +11036,18 @@ err_router_ops_init: void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_sp_router *router = mlxsw_sp->router; + unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), - &mlxsw_sp->router->netdevice_nb); - unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp), - &mlxsw_sp->router->fib_nb); + &router->netdevice_nb); + unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp), &router->fib_nb); unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp), - &mlxsw_sp->router->nexthop_nb); - unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); - unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb); - unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb); + &router->nexthop_nb); + unregister_netevent_notifier(&router->netevent_nb); + unregister_inet6addr_validator_notifier(&router->inet6addr_valid_nb); + unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb); + unregister_inet6addr_notifier(&router->inet6addr_nb); + unregister_inetaddr_notifier(&router->inetaddr_nb); mlxsw_core_flush_owq(); mlxsw_sp_mp_hash_fini(mlxsw_sp); mlxsw_sp_neigh_fini(mlxsw_sp); @@ -10674,12 +11055,13 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_vrs_fini(mlxsw_sp); mlxsw_sp_mr_fini(mlxsw_sp); mlxsw_sp_lpm_fini(mlxsw_sp); - rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht); - rhashtable_destroy(&mlxsw_sp->router->nexthop_ht); - mlxsw_sp_ipips_fini(mlxsw_sp); + rhashtable_destroy(&router->nexthop_group_ht); + rhashtable_destroy(&router->nexthop_ht); mlxsw_sp_rifs_fini(mlxsw_sp); + rhashtable_destroy(&mlxsw_sp->router->crif_ht); + mlxsw_sp_ipips_fini(mlxsw_sp); __mlxsw_sp_router_fini(mlxsw_sp); - cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw); - mutex_destroy(&mlxsw_sp->router->lock); - kfree(mlxsw_sp->router); + cancel_delayed_work_sync(&router->nh_grp_activity_dw); + mutex_destroy(&router->lock); + kfree(router); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 37d6e4c80e6a..9a2669a08480 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -20,6 +20,7 @@ struct mlxsw_sp_router_nve_decap { struct mlxsw_sp_router { struct mlxsw_sp *mlxsw_sp; + struct rhashtable crif_ht; struct gen_pool *rifs_table; struct mlxsw_sp_rif **rifs; struct idr rif_mac_profiles_idr; @@ -52,12 +53,14 @@ struct mlxsw_sp_router { struct notifier_block inetaddr_nb; struct notifier_block inet6addr_nb; struct notifier_block netdevice_nb; + struct notifier_block inetaddr_valid_nb; + struct notifier_block inet6addr_valid_nb; const struct mlxsw_sp_rif_ops **rif_ops_arr; const struct mlxsw_sp_ipip_ops **ipip_ops_arr; struct mlxsw_sp_router_nve_decap nve_decap_config; struct mutex lock; /* Protects shared router resources */ struct mlxsw_sp_fib_entry_op_ctx *ll_op_ctx; - u16 lb_rif_index; + struct mlxsw_sp_crif *lb_crif; const struct mlxsw_sp_adj_grp_size_range *adj_grp_size_ranges; size_t adj_grp_size_ranges_count; struct delayed_work nh_grp_activity_dw; @@ -91,7 +94,9 @@ u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif); u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif); u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev); int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); -const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif); +bool mlxsw_sp_rif_has_dev(const struct mlxsw_sp_rif *rif); +bool mlxsw_sp_rif_dev_is(const struct mlxsw_sp_rif *rif, + const struct net_device *dev); int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir, @@ -166,5 +171,8 @@ int mlxsw_sp_ipip_ecn_encap_init(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_ipip_ecn_decap_init(struct mlxsw_sp *mlxsw_sp); struct net_device * mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev); +int mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev, + struct netlink_ext_ack *extack); #endif /* _MLXSW_ROUTER_H_*/ diff --git a/drivers/net/ethernet/microchip/Kconfig b/drivers/net/ethernet/microchip/Kconfig index 24c994baad13..329e374b9539 100644 --- a/drivers/net/ethernet/microchip/Kconfig +++ b/drivers/net/ethernet/microchip/Kconfig @@ -46,7 +46,7 @@ config LAN743X tristate "LAN743x support" depends on PCI depends on PTP_1588_CLOCK_OPTIONAL - select PHYLIB + select FIXED_PHY select CRC16 select CRC32 help diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c index 176efbeae127..d6c9491537e4 100644 --- a/drivers/net/ethernet/microchip/enc28j60.c +++ b/drivers/net/ethernet/microchip/enc28j60.c @@ -58,7 +58,6 @@ struct enc28j60_net { struct mutex lock; struct sk_buff *tx_skb; struct work_struct tx_work; - struct work_struct irq_work; struct work_struct setrx_work; struct work_struct restart_work; u8 bank; /* current register bank selected */ @@ -1118,10 +1117,9 @@ static int enc28j60_rx_interrupt(struct net_device *ndev) return ret; } -static void enc28j60_irq_work_handler(struct work_struct *work) +static irqreturn_t enc28j60_irq(int irq, void *dev_id) { - struct enc28j60_net *priv = - container_of(work, struct enc28j60_net, irq_work); + struct enc28j60_net *priv = dev_id; struct net_device *ndev = priv->netdev; int intflags, loop; @@ -1225,6 +1223,8 @@ static void enc28j60_irq_work_handler(struct work_struct *work) /* re-enable interrupts */ locked_reg_bfset(priv, EIE, EIE_INTIE); + + return IRQ_HANDLED; } /* @@ -1309,22 +1309,6 @@ static void enc28j60_tx_work_handler(struct work_struct *work) enc28j60_hw_tx(priv); } -static irqreturn_t enc28j60_irq(int irq, void *dev_id) -{ - struct enc28j60_net *priv = dev_id; - - /* - * Can't do anything in interrupt context because we need to - * block (spi_sync() is blocking) so fire of the interrupt - * handling workqueue. - * Remember that we access enc28j60 registers through SPI bus - * via spi_sync() call. - */ - schedule_work(&priv->irq_work); - - return IRQ_HANDLED; -} - static void enc28j60_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct enc28j60_net *priv = netdev_priv(ndev); @@ -1559,7 +1543,6 @@ static int enc28j60_probe(struct spi_device *spi) mutex_init(&priv->lock); INIT_WORK(&priv->tx_work, enc28j60_tx_work_handler); INIT_WORK(&priv->setrx_work, enc28j60_setrx_work_handler); - INIT_WORK(&priv->irq_work, enc28j60_irq_work_handler); INIT_WORK(&priv->restart_work, enc28j60_restart_work_handler); spi_set_drvdata(spi, priv); /* spi to priv reference */ SET_NETDEV_DEV(dev, &spi->dev); @@ -1578,7 +1561,8 @@ static int enc28j60_probe(struct spi_device *spi) /* Board setup must set the relevant edge trigger type; * level triggers won't currently work. */ - ret = request_irq(spi->irq, enc28j60_irq, 0, DRV_NAME, priv); + ret = request_threaded_irq(spi->irq, NULL, enc28j60_irq, IRQF_ONESHOT, + DRV_NAME, priv); if (ret < 0) { if (netif_msg_probe(priv)) dev_err(&spi->dev, "request irq %d failed (ret = %d)\n", diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 957d96a91a8a..a36f6369f132 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -144,6 +144,18 @@ static int lan743x_csr_light_reset(struct lan743x_adapter *adapter) !(data & HW_CFG_LRST_), 100000, 10000000); } +static int lan743x_csr_wait_for_bit_atomic(struct lan743x_adapter *adapter, + int offset, u32 bit_mask, + int target_value, int udelay_min, + int udelay_max, int count) +{ + u32 data; + + return readx_poll_timeout_atomic(LAN743X_CSR_READ_OP, offset, data, + target_value == !!(data & bit_mask), + udelay_max, udelay_min * count); +} + static int lan743x_csr_wait_for_bit(struct lan743x_adapter *adapter, int offset, u32 bit_mask, int target_value, int usleep_min, @@ -152,7 +164,7 @@ static int lan743x_csr_wait_for_bit(struct lan743x_adapter *adapter, u32 data; return readx_poll_timeout(LAN743X_CSR_READ_OP, offset, data, - target_value == ((data & bit_mask) ? 1 : 0), + target_value == !!(data & bit_mask), usleep_max, usleep_min * count); } @@ -160,16 +172,13 @@ static int lan743x_csr_init(struct lan743x_adapter *adapter) { struct lan743x_csr *csr = &adapter->csr; resource_size_t bar_start, bar_length; - int result; bar_start = pci_resource_start(adapter->pdev, 0); bar_length = pci_resource_len(adapter->pdev, 0); csr->csr_address = devm_ioremap(&adapter->pdev->dev, bar_start, bar_length); - if (!csr->csr_address) { - result = -ENOMEM; - goto clean_up; - } + if (!csr->csr_address) + return -ENOMEM; csr->id_rev = lan743x_csr_read(adapter, ID_REV); csr->fpga_rev = lan743x_csr_read(adapter, FPGA_REV); @@ -177,10 +186,8 @@ static int lan743x_csr_init(struct lan743x_adapter *adapter) "ID_REV = 0x%08X, FPGA_REV = %d.%d\n", csr->id_rev, FPGA_REV_GET_MAJOR_(csr->fpga_rev), FPGA_REV_GET_MINOR_(csr->fpga_rev)); - if (!ID_REV_IS_VALID_CHIP_ID_(csr->id_rev)) { - result = -ENODEV; - goto clean_up; - } + if (!ID_REV_IS_VALID_CHIP_ID_(csr->id_rev)) + return -ENODEV; csr->flags = LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR; switch (csr->id_rev & ID_REV_CHIP_REV_MASK_) { @@ -193,12 +200,7 @@ static int lan743x_csr_init(struct lan743x_adapter *adapter) break; } - result = lan743x_csr_light_reset(adapter); - if (result) - goto clean_up; - return 0; -clean_up: - return result; + return lan743x_csr_light_reset(adapter); } static void lan743x_intr_software_isr(struct lan743x_adapter *adapter) @@ -746,8 +748,8 @@ static int lan743x_dp_write(struct lan743x_adapter *adapter, u32 dp_sel; int i; - if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_, - 1, 40, 100, 100)) + if (lan743x_csr_wait_for_bit_atomic(adapter, DP_SEL, DP_SEL_DPRDY_, + 1, 40, 100, 100)) return -EIO; dp_sel = lan743x_csr_read(adapter, DP_SEL); dp_sel &= ~DP_SEL_MASK_; @@ -758,8 +760,9 @@ static int lan743x_dp_write(struct lan743x_adapter *adapter, lan743x_csr_write(adapter, DP_ADDR, addr + i); lan743x_csr_write(adapter, DP_DATA_0, buf[i]); lan743x_csr_write(adapter, DP_CMD, DP_CMD_WRITE_); - if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_, - 1, 40, 100, 100)) + if (lan743x_csr_wait_for_bit_atomic(adapter, DP_SEL, + DP_SEL_DPRDY_, + 1, 40, 100, 100)) return -EIO; } diff --git a/drivers/net/ethernet/microchip/lan966x/Kconfig b/drivers/net/ethernet/microchip/lan966x/Kconfig index 571e6d4da1e9..f9ebffc04eb8 100644 --- a/drivers/net/ethernet/microchip/lan966x/Kconfig +++ b/drivers/net/ethernet/microchip/lan966x/Kconfig @@ -10,3 +10,14 @@ config LAN966X_SWITCH select VCAP help This driver supports the Lan966x network switch device. + +config LAN966X_DCB + bool "Data Center Bridging (DCB) support" + depends on LAN966X_SWITCH && DCB + default y + help + Say Y here if you want to use Data Center Bridging (DCB) in the + driver. This can be used to assign priority to traffic, based on + DSCP and PCP. + + If unsure, set to Y. diff --git a/drivers/net/ethernet/microchip/lan966x/Makefile b/drivers/net/ethernet/microchip/lan966x/Makefile index 7b0cda4ffa6b..3b6ac331691d 100644 --- a/drivers/net/ethernet/microchip/lan966x/Makefile +++ b/drivers/net/ethernet/microchip/lan966x/Makefile @@ -15,6 +15,7 @@ lan966x-switch-objs := lan966x_main.o lan966x_phylink.o lan966x_port.o \ lan966x_xdp.o lan966x_vcap_impl.o lan966x_vcap_ag_api.o \ lan966x_tc_flower.o lan966x_goto.o +lan966x-switch-$(CONFIG_LAN966X_DCB) += lan966x_dcb.o lan966x-switch-$(CONFIG_DEBUG_FS) += lan966x_vcap_debugfs.o # Provide include files diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c b/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c new file mode 100644 index 000000000000..ed2d96d7908e --- /dev/null +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c @@ -0,0 +1,365 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include "lan966x_main.h" + +enum lan966x_dcb_apptrust_values { + LAN966X_DCB_APPTRUST_EMPTY, + LAN966X_DCB_APPTRUST_DSCP, + LAN966X_DCB_APPTRUST_PCP, + LAN966X_DCB_APPTRUST_DSCP_PCP, + __LAN966X_DCB_APPTRUST_MAX +}; + +static const struct lan966x_dcb_apptrust { + u8 selectors[IEEE_8021QAZ_APP_SEL_MAX + 1]; + int nselectors; +} *lan966x_port_apptrust[NUM_PHYS_PORTS]; + +static const char *lan966x_dcb_apptrust_names[__LAN966X_DCB_APPTRUST_MAX] = { + [LAN966X_DCB_APPTRUST_EMPTY] = "empty", + [LAN966X_DCB_APPTRUST_DSCP] = "dscp", + [LAN966X_DCB_APPTRUST_PCP] = "pcp", + [LAN966X_DCB_APPTRUST_DSCP_PCP] = "dscp pcp" +}; + +/* Lan966x supported apptrust policies */ +static const struct lan966x_dcb_apptrust + lan966x_dcb_apptrust_policies[__LAN966X_DCB_APPTRUST_MAX] = { + /* Empty *must* be first */ + [LAN966X_DCB_APPTRUST_EMPTY] = { { 0 }, 0 }, + [LAN966X_DCB_APPTRUST_DSCP] = { { IEEE_8021QAZ_APP_SEL_DSCP }, 1 }, + [LAN966X_DCB_APPTRUST_PCP] = { { DCB_APP_SEL_PCP }, 1 }, + [LAN966X_DCB_APPTRUST_DSCP_PCP] = { { IEEE_8021QAZ_APP_SEL_DSCP, + DCB_APP_SEL_PCP }, 2 }, +}; + +static bool lan966x_dcb_apptrust_contains(int portno, u8 selector) +{ + const struct lan966x_dcb_apptrust *conf = lan966x_port_apptrust[portno]; + + for (int i = 0; i < conf->nselectors; i++) + if (conf->selectors[i] == selector) + return true; + + return false; +} + +static void lan966x_dcb_app_update(struct net_device *dev) +{ + struct dcb_ieee_app_prio_map dscp_rewr_map = {0}; + struct dcb_rewr_prio_pcp_map pcp_rewr_map = {0}; + struct lan966x_port *port = netdev_priv(dev); + struct lan966x_port_qos qos = {0}; + struct dcb_app app_itr; + bool dscp_rewr = false; + bool pcp_rewr = false; + + /* Get pcp ingress mapping */ + for (int i = 0; i < ARRAY_SIZE(qos.pcp.map); i++) { + app_itr.selector = DCB_APP_SEL_PCP; + app_itr.protocol = i; + qos.pcp.map[i] = dcb_getapp(dev, &app_itr); + } + + /* Get dscp ingress mapping */ + for (int i = 0; i < ARRAY_SIZE(qos.dscp.map); i++) { + app_itr.selector = IEEE_8021QAZ_APP_SEL_DSCP; + app_itr.protocol = i; + qos.dscp.map[i] = dcb_getapp(dev, &app_itr); + } + + /* Get default prio */ + qos.default_prio = dcb_ieee_getapp_default_prio_mask(dev); + if (qos.default_prio) + qos.default_prio = fls(qos.default_prio) - 1; + + /* Get pcp rewrite mapping */ + dcb_getrewr_prio_pcp_mask_map(dev, &pcp_rewr_map); + for (int i = 0; i < ARRAY_SIZE(pcp_rewr_map.map); i++) { + if (!pcp_rewr_map.map[i]) + continue; + + pcp_rewr = true; + qos.pcp_rewr.map[i] = fls(pcp_rewr_map.map[i]) - 1; + } + + /* Get dscp rewrite mapping */ + dcb_getrewr_prio_dscp_mask_map(dev, &dscp_rewr_map); + for (int i = 0; i < ARRAY_SIZE(dscp_rewr_map.map); i++) { + if (!dscp_rewr_map.map[i]) + continue; + + dscp_rewr = true; + qos.dscp_rewr.map[i] = fls64(dscp_rewr_map.map[i]) - 1; + } + + /* Enable use of pcp for queue classification */ + if (lan966x_dcb_apptrust_contains(port->chip_port, DCB_APP_SEL_PCP)) { + qos.pcp.enable = true; + + if (pcp_rewr) + qos.pcp_rewr.enable = true; + } + + /* Enable use of dscp for queue classification */ + if (lan966x_dcb_apptrust_contains(port->chip_port, IEEE_8021QAZ_APP_SEL_DSCP)) { + qos.dscp.enable = true; + + if (dscp_rewr) + qos.dscp_rewr.enable = true; + } + + lan966x_port_qos_set(port, &qos); +} + +/* DSCP mapping is global for all ports, so set and delete app entries are + * replicated for each port. + */ +static int lan966x_dcb_ieee_dscp_setdel(struct net_device *dev, + struct dcb_app *app, + int (*setdel)(struct net_device *, + struct dcb_app *)) +{ + struct lan966x_port *port = netdev_priv(dev); + struct lan966x *lan966x = port->lan966x; + int err; + + for (int i = 0; i < NUM_PHYS_PORTS; i++) { + port = lan966x->ports[i]; + if (!port) + continue; + + err = setdel(port->dev, app); + if (err) + return err; + } + + return 0; +} + +static int lan966x_dcb_app_validate(struct net_device *dev, + const struct dcb_app *app) +{ + int err = 0; + + switch (app->selector) { + /* Default priority checks */ + case IEEE_8021QAZ_APP_SEL_ETHERTYPE: + if (app->protocol) + err = -EINVAL; + else if (app->priority >= NUM_PRIO_QUEUES) + err = -ERANGE; + break; + /* Dscp checks */ + case IEEE_8021QAZ_APP_SEL_DSCP: + if (app->protocol >= LAN966X_PORT_QOS_DSCP_COUNT) + err = -EINVAL; + else if (app->priority >= NUM_PRIO_QUEUES) + err = -ERANGE; + break; + /* Pcp checks */ + case DCB_APP_SEL_PCP: + if (app->protocol >= LAN966X_PORT_QOS_PCP_DEI_COUNT) + err = -EINVAL; + else if (app->priority >= NUM_PRIO_QUEUES) + err = -ERANGE; + break; + default: + err = -EINVAL; + break; + } + + if (err) + netdev_err(dev, "Invalid entry: %d:%d\n", app->protocol, + app->priority); + + return err; +} + +static int lan966x_dcb_ieee_delapp(struct net_device *dev, struct dcb_app *app) +{ + int err; + + if (app->selector == IEEE_8021QAZ_APP_SEL_DSCP) + err = lan966x_dcb_ieee_dscp_setdel(dev, app, dcb_ieee_delapp); + else + err = dcb_ieee_delapp(dev, app); + + if (err) + return err; + + lan966x_dcb_app_update(dev); + + return 0; +} + +static int lan966x_dcb_ieee_setapp(struct net_device *dev, struct dcb_app *app) +{ + struct dcb_app app_itr; + int err; + u8 prio; + + err = lan966x_dcb_app_validate(dev, app); + if (err) + return err; + + /* Delete current mapping, if it exists */ + prio = dcb_getapp(dev, app); + if (prio) { + app_itr = *app; + app_itr.priority = prio; + lan966x_dcb_ieee_delapp(dev, &app_itr); + } + + if (app->selector == IEEE_8021QAZ_APP_SEL_DSCP) + err = lan966x_dcb_ieee_dscp_setdel(dev, app, dcb_ieee_setapp); + else + err = dcb_ieee_setapp(dev, app); + + if (err) + return err; + + lan966x_dcb_app_update(dev); + + return 0; +} + +static int lan966x_dcb_apptrust_validate(struct net_device *dev, + u8 *selectors, + int nselectors) +{ + for (int i = 0; i < ARRAY_SIZE(lan966x_dcb_apptrust_policies); i++) { + bool match; + + if (lan966x_dcb_apptrust_policies[i].nselectors != nselectors) + continue; + + match = true; + for (int j = 0; j < nselectors; j++) { + if (lan966x_dcb_apptrust_policies[i].selectors[j] != + *(selectors + j)) { + match = false; + break; + } + } + if (match) + return i; + } + + netdev_err(dev, "Valid apptrust configurations are:\n"); + for (int i = 0; i < ARRAY_SIZE(lan966x_dcb_apptrust_names); i++) + pr_info("order: %s\n", lan966x_dcb_apptrust_names[i]); + + return -EOPNOTSUPP; +} + +static int lan966x_dcb_setapptrust(struct net_device *dev, + u8 *selectors, + int nselectors) +{ + struct lan966x_port *port = netdev_priv(dev); + int idx; + + idx = lan966x_dcb_apptrust_validate(dev, selectors, nselectors); + if (idx < 0) + return idx; + + lan966x_port_apptrust[port->chip_port] = &lan966x_dcb_apptrust_policies[idx]; + lan966x_dcb_app_update(dev); + + return 0; +} + +static int lan966x_dcb_getapptrust(struct net_device *dev, u8 *selectors, + int *nselectors) +{ + struct lan966x_port *port = netdev_priv(dev); + const struct lan966x_dcb_apptrust *trust; + + trust = lan966x_port_apptrust[port->chip_port]; + + memcpy(selectors, trust->selectors, trust->nselectors); + *nselectors = trust->nselectors; + + return 0; +} + +static int lan966x_dcb_delrewr(struct net_device *dev, struct dcb_app *app) +{ + int err; + + if (app->selector == IEEE_8021QAZ_APP_SEL_DSCP) + err = lan966x_dcb_ieee_dscp_setdel(dev, app, dcb_delrewr); + else + err = dcb_delrewr(dev, app); + + if (err < 0) + return err; + + lan966x_dcb_app_update(dev); + + return 0; +} + +static int lan966x_dcb_setrewr(struct net_device *dev, struct dcb_app *app) +{ + struct dcb_app app_itr; + u16 proto; + int err; + + err = lan966x_dcb_app_validate(dev, app); + if (err) + goto out; + + /* Delete current mapping, if it exists. */ + proto = dcb_getrewr(dev, app); + if (proto) { + app_itr = *app; + app_itr.protocol = proto; + lan966x_dcb_delrewr(dev, &app_itr); + } + + if (app->selector == IEEE_8021QAZ_APP_SEL_DSCP) + err = lan966x_dcb_ieee_dscp_setdel(dev, app, dcb_setrewr); + else + err = dcb_setrewr(dev, app); + + if (err) + goto out; + + lan966x_dcb_app_update(dev); + +out: + return err; +} + +static const struct dcbnl_rtnl_ops lan966x_dcbnl_ops = { + .ieee_setapp = lan966x_dcb_ieee_setapp, + .ieee_delapp = lan966x_dcb_ieee_delapp, + .dcbnl_setapptrust = lan966x_dcb_setapptrust, + .dcbnl_getapptrust = lan966x_dcb_getapptrust, + .dcbnl_setrewr = lan966x_dcb_setrewr, + .dcbnl_delrewr = lan966x_dcb_delrewr, +}; + +void lan966x_dcb_init(struct lan966x *lan966x) +{ + for (int p = 0; p < lan966x->num_phys_ports; ++p) { + struct lan966x_port *port; + + port = lan966x->ports[p]; + if (!port) + continue; + + port->dev->dcbnl_ops = &lan966x_dcbnl_ops; + + lan966x_port_apptrust[port->chip_port] = + &lan966x_dcb_apptrust_policies[LAN966X_DCB_APPTRUST_DSCP_PCP]; + + /* Enable DSCP classification based on classified QoS class and + * DP, for all DSCP values, for all ports. + */ + lan966x_port_qos_dscp_rewr_mode_set(port, + LAN966X_PORT_QOS_REWR_DSCP_ALL); + } +} diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index ee2698698d71..fbb0bb4594cd 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -818,6 +818,7 @@ static int lan966x_probe_port(struct lan966x *lan966x, u32 p, port->phylink_config.type = PHYLINK_NETDEV; port->phylink_pcs.poll = true; port->phylink_pcs.ops = &lan966x_phylink_pcs_ops; + port->phylink_pcs.neg_mode = true; port->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | MAC_10 | MAC_100 | MAC_1000FD | MAC_2500FD; @@ -1223,6 +1224,8 @@ static int lan966x_probe(struct platform_device *pdev) if (err) goto cleanup_fdma; + lan966x_dcb_init(lan966x); + return 0; cleanup_fdma: diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index c977c70abc3d..27f272831ea5 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -101,6 +101,25 @@ #define LAN966X_VCAP_CID_IS2_L1 VCAP_CID_INGRESS_STAGE2_L1 /* IS2 lookup 1 */ #define LAN966X_VCAP_CID_IS2_MAX (VCAP_CID_INGRESS_STAGE2_L2 - 1) /* IS2 Max */ +#define LAN966X_VCAP_CID_ES0_L0 VCAP_CID_EGRESS_L0 /* ES0 lookup 0 */ +#define LAN966X_VCAP_CID_ES0_MAX (VCAP_CID_EGRESS_L1 - 1) /* ES0 Max */ + +#define LAN966X_PORT_QOS_PCP_COUNT 8 +#define LAN966X_PORT_QOS_DEI_COUNT 8 +#define LAN966X_PORT_QOS_PCP_DEI_COUNT \ + (LAN966X_PORT_QOS_PCP_COUNT + LAN966X_PORT_QOS_DEI_COUNT) + +#define LAN966X_PORT_QOS_DSCP_COUNT 64 + +/* Port PCP rewrite mode */ +#define LAN966X_PORT_REW_TAG_CTRL_CLASSIFIED 0 +#define LAN966X_PORT_REW_TAG_CTRL_MAPPED 2 + +/* Port DSCP rewrite mode */ +#define LAN966X_PORT_REW_DSCP_FRAME 0 +#define LAN966X_PORT_REW_DSCP_ANALIZER 1 +#define LAN966X_PORT_QOS_REWR_DSCP_ALL 3 + /* MAC table entry types. * ENTRYTYPE_NORMAL is subject to aging. * ENTRYTYPE_LOCKED is not subject to aging. @@ -389,6 +408,34 @@ struct lan966x_port_tc { struct flow_stats mirror_stat; }; +struct lan966x_port_qos_pcp { + u8 map[LAN966X_PORT_QOS_PCP_DEI_COUNT]; + bool enable; +}; + +struct lan966x_port_qos_dscp { + u8 map[LAN966X_PORT_QOS_DSCP_COUNT]; + bool enable; +}; + +struct lan966x_port_qos_pcp_rewr { + u16 map[NUM_PRIO_QUEUES]; + bool enable; +}; + +struct lan966x_port_qos_dscp_rewr { + u16 map[LAN966X_PORT_QOS_DSCP_COUNT]; + bool enable; +}; + +struct lan966x_port_qos { + struct lan966x_port_qos_pcp pcp; + struct lan966x_port_qos_dscp dscp; + struct lan966x_port_qos_pcp_rewr pcp_rewr; + struct lan966x_port_qos_dscp_rewr dscp_rewr; + u8 default_prio; +}; + struct lan966x_port { struct net_device *dev; struct lan966x *lan966x; @@ -453,6 +500,11 @@ int lan966x_port_pcs_set(struct lan966x_port *port, struct lan966x_port_config *config); void lan966x_port_init(struct lan966x_port *port); +void lan966x_port_qos_set(struct lan966x_port *port, + struct lan966x_port_qos *qos); +void lan966x_port_qos_dscp_rewr_mode_set(struct lan966x_port *port, + int mode); + int lan966x_mac_ip_learn(struct lan966x *lan966x, bool cpu_copy, const unsigned char mac[ETH_ALEN], @@ -677,6 +729,14 @@ int lan966x_goto_port_del(struct lan966x_port *port, unsigned long goto_id, struct netlink_ext_ack *extack); +#ifdef CONFIG_LAN966X_DCB +void lan966x_dcb_init(struct lan966x *lan966x); +#else +static inline void lan966x_dcb_init(struct lan966x *lan966x) +{ +} +#endif + static inline void __iomem *lan_addr(void __iomem *base[], int id, int tinst, int tcnt, int gbase, int ginst, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c b/drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c index c5f9803e6e63..1d63903f9006 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c @@ -95,8 +95,7 @@ static void lan966x_pcs_get_state(struct phylink_pcs *pcs, lan966x_port_status_get(port, state); } -static int lan966x_pcs_config(struct phylink_pcs *pcs, - unsigned int mode, +static int lan966x_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, bool permit_pause_to_mac) @@ -107,8 +106,8 @@ static int lan966x_pcs_config(struct phylink_pcs *pcs, config = port->config; config.portmode = interface; - config.inband = phylink_autoneg_inband(mode); - config.autoneg = phylink_test(advertising, Autoneg); + config.inband = neg_mode & PHYLINK_PCS_NEG_INBAND; + config.autoneg = neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED; config.advertising = advertising; ret = lan966x_port_pcs_set(port, &config); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c index 0050fcb988b7..92108d354051 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c @@ -394,6 +394,155 @@ int lan966x_port_pcs_set(struct lan966x_port *port, return 0; } +static void lan966x_port_qos_pcp_set(struct lan966x_port *port, + struct lan966x_port_qos_pcp *qos) +{ + u8 *pcp_itr = qos->map; + u8 pcp, dp; + + lan_rmw(ANA_QOS_CFG_QOS_PCP_ENA_SET(qos->enable), + ANA_QOS_CFG_QOS_PCP_ENA, + port->lan966x, ANA_QOS_CFG(port->chip_port)); + + /* Map PCP and DEI to priority */ + for (int i = 0; i < ARRAY_SIZE(qos->map); i++) { + pcp = *(pcp_itr + i); + dp = (i < LAN966X_PORT_QOS_PCP_COUNT) ? 0 : 1; + + lan_rmw(ANA_PCP_DEI_CFG_QOS_PCP_DEI_VAL_SET(pcp) | + ANA_PCP_DEI_CFG_DP_PCP_DEI_VAL_SET(dp), + ANA_PCP_DEI_CFG_QOS_PCP_DEI_VAL | + ANA_PCP_DEI_CFG_DP_PCP_DEI_VAL, + port->lan966x, + ANA_PCP_DEI_CFG(port->chip_port, i)); + } +} + +static void lan966x_port_qos_dscp_set(struct lan966x_port *port, + struct lan966x_port_qos_dscp *qos) +{ + struct lan966x *lan966x = port->lan966x; + + /* Enable/disable dscp for qos classification. */ + lan_rmw(ANA_QOS_CFG_QOS_DSCP_ENA_SET(qos->enable), + ANA_QOS_CFG_QOS_DSCP_ENA, + lan966x, ANA_QOS_CFG(port->chip_port)); + + /* Map each dscp value to priority and dp */ + for (int i = 0; i < ARRAY_SIZE(qos->map); i++) + lan_rmw(ANA_DSCP_CFG_DP_DSCP_VAL_SET(0) | + ANA_DSCP_CFG_QOS_DSCP_VAL_SET(*(qos->map + i)), + ANA_DSCP_CFG_DP_DSCP_VAL | + ANA_DSCP_CFG_QOS_DSCP_VAL, + lan966x, ANA_DSCP_CFG(i)); + + /* Set per-dscp trust */ + for (int i = 0; i < ARRAY_SIZE(qos->map); i++) + lan_rmw(ANA_DSCP_CFG_DSCP_TRUST_ENA_SET(qos->enable), + ANA_DSCP_CFG_DSCP_TRUST_ENA, + lan966x, ANA_DSCP_CFG(i)); +} + +static int lan966x_port_qos_default_set(struct lan966x_port *port, + struct lan966x_port_qos *qos) +{ + /* Set default prio and dp level */ + lan_rmw(ANA_QOS_CFG_DP_DEFAULT_VAL_SET(0) | + ANA_QOS_CFG_QOS_DEFAULT_VAL_SET(qos->default_prio), + ANA_QOS_CFG_DP_DEFAULT_VAL | + ANA_QOS_CFG_QOS_DEFAULT_VAL, + port->lan966x, ANA_QOS_CFG(port->chip_port)); + + /* Set default pcp and dei for untagged frames */ + lan_rmw(ANA_VLAN_CFG_VLAN_DEI_SET(0) | + ANA_VLAN_CFG_VLAN_PCP_SET(0), + ANA_VLAN_CFG_VLAN_DEI | + ANA_VLAN_CFG_VLAN_PCP, + port->lan966x, ANA_VLAN_CFG(port->chip_port)); + + return 0; +} + +static void lan966x_port_qos_pcp_rewr_set(struct lan966x_port *port, + struct lan966x_port_qos_pcp_rewr *qos) +{ + u8 mode = LAN966X_PORT_REW_TAG_CTRL_CLASSIFIED; + u8 pcp, dei; + + if (qos->enable) + mode = LAN966X_PORT_REW_TAG_CTRL_MAPPED; + + /* Map the values only if it is enabled otherwise will be the classified + * value + */ + lan_rmw(REW_TAG_CFG_TAG_PCP_CFG_SET(mode) | + REW_TAG_CFG_TAG_DEI_CFG_SET(mode), + REW_TAG_CFG_TAG_PCP_CFG | + REW_TAG_CFG_TAG_DEI_CFG, + port->lan966x, REW_TAG_CFG(port->chip_port)); + + /* Map each value to pcp and dei */ + for (int i = 0; i < ARRAY_SIZE(qos->map); i++) { + pcp = qos->map[i]; + if (pcp > LAN966X_PORT_QOS_PCP_COUNT) + dei = 1; + else + dei = 0; + + lan_rmw(REW_PCP_DEI_CFG_DEI_QOS_VAL_SET(dei) | + REW_PCP_DEI_CFG_PCP_QOS_VAL_SET(pcp), + REW_PCP_DEI_CFG_DEI_QOS_VAL | + REW_PCP_DEI_CFG_PCP_QOS_VAL, + port->lan966x, + REW_PCP_DEI_CFG(port->chip_port, + i + dei * LAN966X_PORT_QOS_PCP_COUNT)); + } +} + +static void lan966x_port_qos_dscp_rewr_set(struct lan966x_port *port, + struct lan966x_port_qos_dscp_rewr *qos) +{ + u16 dscp; + u8 mode; + + if (qos->enable) + mode = LAN966X_PORT_REW_DSCP_ANALIZER; + else + mode = LAN966X_PORT_REW_DSCP_FRAME; + + /* Enable the rewrite otherwise will use the values from the frame */ + lan_rmw(REW_DSCP_CFG_DSCP_REWR_CFG_SET(mode), + REW_DSCP_CFG_DSCP_REWR_CFG, + port->lan966x, REW_DSCP_CFG(port->chip_port)); + + /* Map each classified Qos class and DP to classified DSCP value */ + for (int i = 0; i < ARRAY_SIZE(qos->map); i++) { + dscp = qos->map[i]; + + lan_rmw(ANA_DSCP_REWR_CFG_DSCP_QOS_REWR_VAL_SET(dscp), + ANA_DSCP_REWR_CFG_DSCP_QOS_REWR_VAL, + port->lan966x, ANA_DSCP_REWR_CFG(i)); + } +} + +void lan966x_port_qos_dscp_rewr_mode_set(struct lan966x_port *port, + int mode) +{ + lan_rmw(ANA_QOS_CFG_DSCP_REWR_CFG_SET(mode), + ANA_QOS_CFG_DSCP_REWR_CFG, + port->lan966x, ANA_QOS_CFG(port->chip_port)); +} + +void lan966x_port_qos_set(struct lan966x_port *port, + struct lan966x_port_qos *qos) +{ + lan966x_port_qos_pcp_set(port, &qos->pcp); + lan966x_port_qos_dscp_set(port, &qos->dscp); + lan966x_port_qos_default_set(port, qos); + lan966x_port_qos_pcp_rewr_set(port, &qos->pcp_rewr); + lan966x_port_qos_dscp_rewr_set(port, &qos->dscp_rewr); +} + void lan966x_port_init(struct lan966x_port *port) { struct lan966x_port_config *config = &port->config; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c index 266a21a2d124..1da2b1f82ae9 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c @@ -59,7 +59,7 @@ static int lan966x_ptp_add_trap(struct lan966x_port *port, int err; vrule = vcap_get_rule(lan966x->vcap_ctrl, rule_id); - if (vrule) { + if (!IS_ERR(vrule)) { u32 value, mask; /* Just modify the ingress port mask and exit */ @@ -106,7 +106,7 @@ static int lan966x_ptp_del_trap(struct lan966x_port *port, int err; vrule = vcap_get_rule(lan966x->vcap_ctrl, rule_id); - if (!vrule) + if (IS_ERR(vrule)) return -EEXIST; vcap_rule_get_key_u32(vrule, VCAP_KF_IF_IGR_PORT_MASK, &value, &mask); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h index f99f88b5caa8..4b553927d2e0 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h @@ -283,6 +283,18 @@ enum lan966x_target { #define ANA_VLAN_CFG_VLAN_POP_CNT_GET(x)\ FIELD_GET(ANA_VLAN_CFG_VLAN_POP_CNT, x) +#define ANA_VLAN_CFG_VLAN_PCP GENMASK(15, 13) +#define ANA_VLAN_CFG_VLAN_PCP_SET(x)\ + FIELD_PREP(ANA_VLAN_CFG_VLAN_PCP, x) +#define ANA_VLAN_CFG_VLAN_PCP_GET(x)\ + FIELD_GET(ANA_VLAN_CFG_VLAN_PCP, x) + +#define ANA_VLAN_CFG_VLAN_DEI BIT(12) +#define ANA_VLAN_CFG_VLAN_DEI_SET(x)\ + FIELD_PREP(ANA_VLAN_CFG_VLAN_DEI, x) +#define ANA_VLAN_CFG_VLAN_DEI_GET(x)\ + FIELD_GET(ANA_VLAN_CFG_VLAN_DEI, x) + #define ANA_VLAN_CFG_VLAN_VID GENMASK(11, 0) #define ANA_VLAN_CFG_VLAN_VID_SET(x)\ FIELD_PREP(ANA_VLAN_CFG_VLAN_VID, x) @@ -316,6 +328,39 @@ enum lan966x_target { #define ANA_DROP_CFG_DROP_MC_SMAC_ENA_GET(x)\ FIELD_GET(ANA_DROP_CFG_DROP_MC_SMAC_ENA, x) +/* ANA:PORT:QOS_CFG */ +#define ANA_QOS_CFG(g) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 8, 0, 1, 4) + +#define ANA_QOS_CFG_DP_DEFAULT_VAL BIT(8) +#define ANA_QOS_CFG_DP_DEFAULT_VAL_SET(x)\ + FIELD_PREP(ANA_QOS_CFG_DP_DEFAULT_VAL, x) +#define ANA_QOS_CFG_DP_DEFAULT_VAL_GET(x)\ + FIELD_GET(ANA_QOS_CFG_DP_DEFAULT_VAL, x) + +#define ANA_QOS_CFG_QOS_DEFAULT_VAL GENMASK(7, 5) +#define ANA_QOS_CFG_QOS_DEFAULT_VAL_SET(x)\ + FIELD_PREP(ANA_QOS_CFG_QOS_DEFAULT_VAL, x) +#define ANA_QOS_CFG_QOS_DEFAULT_VAL_GET(x)\ + FIELD_GET(ANA_QOS_CFG_QOS_DEFAULT_VAL, x) + +#define ANA_QOS_CFG_QOS_DSCP_ENA BIT(4) +#define ANA_QOS_CFG_QOS_DSCP_ENA_SET(x)\ + FIELD_PREP(ANA_QOS_CFG_QOS_DSCP_ENA, x) +#define ANA_QOS_CFG_QOS_DSCP_ENA_GET(x)\ + FIELD_GET(ANA_QOS_CFG_QOS_DSCP_ENA, x) + +#define ANA_QOS_CFG_QOS_PCP_ENA BIT(3) +#define ANA_QOS_CFG_QOS_PCP_ENA_SET(x)\ + FIELD_PREP(ANA_QOS_CFG_QOS_PCP_ENA, x) +#define ANA_QOS_CFG_QOS_PCP_ENA_GET(x)\ + FIELD_GET(ANA_QOS_CFG_QOS_PCP_ENA, x) + +#define ANA_QOS_CFG_DSCP_REWR_CFG GENMASK(1, 0) +#define ANA_QOS_CFG_DSCP_REWR_CFG_SET(x)\ + FIELD_PREP(ANA_QOS_CFG_DSCP_REWR_CFG, x) +#define ANA_QOS_CFG_DSCP_REWR_CFG_GET(x)\ + FIELD_GET(ANA_QOS_CFG_DSCP_REWR_CFG, x) + /* ANA:PORT:VCAP_CFG */ #define ANA_VCAP_CFG(g) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 12, 0, 1, 4) @@ -415,6 +460,21 @@ enum lan966x_target { #define ANA_VCAP_S2_CFG_OAM_DIS_GET(x)\ FIELD_GET(ANA_VCAP_S2_CFG_OAM_DIS, x) +/* ANA:PORT:QOS_PCP_DEI_MAP_CFG */ +#define ANA_PCP_DEI_CFG(g, r) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 32, r, 16, 4) + +#define ANA_PCP_DEI_CFG_DP_PCP_DEI_VAL BIT(3) +#define ANA_PCP_DEI_CFG_DP_PCP_DEI_VAL_SET(x)\ + FIELD_PREP(ANA_PCP_DEI_CFG_DP_PCP_DEI_VAL, x) +#define ANA_PCP_DEI_CFG_DP_PCP_DEI_VAL_GET(x)\ + FIELD_GET(ANA_PCP_DEI_CFG_DP_PCP_DEI_VAL, x) + +#define ANA_PCP_DEI_CFG_QOS_PCP_DEI_VAL GENMASK(2, 0) +#define ANA_PCP_DEI_CFG_QOS_PCP_DEI_VAL_SET(x)\ + FIELD_PREP(ANA_PCP_DEI_CFG_QOS_PCP_DEI_VAL, x) +#define ANA_PCP_DEI_CFG_QOS_PCP_DEI_VAL_GET(x)\ + FIELD_GET(ANA_PCP_DEI_CFG_QOS_PCP_DEI_VAL, x) + /* ANA:PORT:CPU_FWD_CFG */ #define ANA_CPU_FWD_CFG(g) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 96, 0, 1, 4) @@ -478,6 +538,15 @@ enum lan966x_target { #define ANA_PORT_CFG_PORTID_VAL_GET(x)\ FIELD_GET(ANA_PORT_CFG_PORTID_VAL, x) +/* ANA:COMMON:DSCP_REWR_CFG */ +#define ANA_DSCP_REWR_CFG(r) __REG(TARGET_ANA, 0, 1, 31232, 0, 1, 552, 332, r, 16, 4) + +#define ANA_DSCP_REWR_CFG_DSCP_QOS_REWR_VAL GENMASK(5, 0) +#define ANA_DSCP_REWR_CFG_DSCP_QOS_REWR_VAL_SET(x)\ + FIELD_PREP(ANA_DSCP_REWR_CFG_DSCP_QOS_REWR_VAL, x) +#define ANA_DSCP_REWR_CFG_DSCP_QOS_REWR_VAL_GET(x)\ + FIELD_GET(ANA_DSCP_REWR_CFG_DSCP_QOS_REWR_VAL, x) + /* ANA:PORT:POL_CFG */ #define ANA_POL_CFG(g) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 116, 0, 1, 4) @@ -547,6 +616,33 @@ enum lan966x_target { #define ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA_GET(x)\ FIELD_GET(ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA, x) +/* ANA:COMMON:DSCP_CFG */ +#define ANA_DSCP_CFG(r) __REG(TARGET_ANA, 0, 1, 31232, 0, 1, 552, 76, r, 64, 4) + +#define ANA_DSCP_CFG_DP_DSCP_VAL BIT(11) +#define ANA_DSCP_CFG_DP_DSCP_VAL_SET(x)\ + FIELD_PREP(ANA_DSCP_CFG_DP_DSCP_VAL, x) +#define ANA_DSCP_CFG_DP_DSCP_VAL_GET(x)\ + FIELD_GET(ANA_DSCP_CFG_DP_DSCP_VAL, x) + +#define ANA_DSCP_CFG_QOS_DSCP_VAL GENMASK(10, 8) +#define ANA_DSCP_CFG_QOS_DSCP_VAL_SET(x)\ + FIELD_PREP(ANA_DSCP_CFG_QOS_DSCP_VAL, x) +#define ANA_DSCP_CFG_QOS_DSCP_VAL_GET(x)\ + FIELD_GET(ANA_DSCP_CFG_QOS_DSCP_VAL, x) + +#define ANA_DSCP_CFG_DSCP_TRUST_ENA BIT(1) +#define ANA_DSCP_CFG_DSCP_TRUST_ENA_SET(x)\ + FIELD_PREP(ANA_DSCP_CFG_DSCP_TRUST_ENA, x) +#define ANA_DSCP_CFG_DSCP_TRUST_ENA_GET(x)\ + FIELD_GET(ANA_DSCP_CFG_DSCP_TRUST_ENA, x) + +#define ANA_DSCP_CFG_DSCP_REWR_ENA BIT(0) +#define ANA_DSCP_CFG_DSCP_REWR_ENA_SET(x)\ + FIELD_PREP(ANA_DSCP_CFG_DSCP_REWR_ENA, x) +#define ANA_DSCP_CFG_DSCP_REWR_ENA_GET(x)\ + FIELD_GET(ANA_DSCP_CFG_DSCP_REWR_ENA, x) + /* ANA:POL:POL_PIR_CFG */ #define ANA_POL_PIR_CFG(g) __REG(TARGET_ANA, 0, 1, 16384, g, 345, 32, 0, 0, 1, 4) @@ -1468,15 +1564,66 @@ enum lan966x_target { #define REW_TAG_CFG_TAG_TPID_CFG_GET(x)\ FIELD_GET(REW_TAG_CFG_TAG_TPID_CFG, x) +#define REW_TAG_CFG_TAG_PCP_CFG GENMASK(3, 2) +#define REW_TAG_CFG_TAG_PCP_CFG_SET(x)\ + FIELD_PREP(REW_TAG_CFG_TAG_PCP_CFG, x) +#define REW_TAG_CFG_TAG_PCP_CFG_GET(x)\ + FIELD_GET(REW_TAG_CFG_TAG_PCP_CFG, x) + +#define REW_TAG_CFG_TAG_DEI_CFG GENMASK(1, 0) +#define REW_TAG_CFG_TAG_DEI_CFG_SET(x)\ + FIELD_PREP(REW_TAG_CFG_TAG_DEI_CFG, x) +#define REW_TAG_CFG_TAG_DEI_CFG_GET(x)\ + FIELD_GET(REW_TAG_CFG_TAG_DEI_CFG, x) + /* REW:PORT:PORT_CFG */ #define REW_PORT_CFG(g) __REG(TARGET_REW, 0, 1, 0, g, 10, 128, 8, 0, 1, 4) +#define REW_PORT_CFG_ES0_EN BIT(4) +#define REW_PORT_CFG_ES0_EN_SET(x)\ + FIELD_PREP(REW_PORT_CFG_ES0_EN, x) +#define REW_PORT_CFG_ES0_EN_GET(x)\ + FIELD_GET(REW_PORT_CFG_ES0_EN, x) + #define REW_PORT_CFG_NO_REWRITE BIT(0) #define REW_PORT_CFG_NO_REWRITE_SET(x)\ FIELD_PREP(REW_PORT_CFG_NO_REWRITE, x) #define REW_PORT_CFG_NO_REWRITE_GET(x)\ FIELD_GET(REW_PORT_CFG_NO_REWRITE, x) +/* REW:PORT:DSCP_CFG */ +#define REW_DSCP_CFG(g) __REG(TARGET_REW, 0, 1, 0, g, 10, 128, 12, 0, 1, 4) + +#define REW_DSCP_CFG_DSCP_REWR_CFG GENMASK(1, 0) +#define REW_DSCP_CFG_DSCP_REWR_CFG_SET(x)\ + FIELD_PREP(REW_DSCP_CFG_DSCP_REWR_CFG, x) +#define REW_DSCP_CFG_DSCP_REWR_CFG_GET(x)\ + FIELD_GET(REW_DSCP_CFG_DSCP_REWR_CFG, x) + +/* REW:PORT:PCP_DEI_QOS_MAP_CFG */ +#define REW_PCP_DEI_CFG(g, r) __REG(TARGET_REW, 0, 1, 0, g, 10, 128, 16, r, 16, 4) + +#define REW_PCP_DEI_CFG_DEI_QOS_VAL BIT(3) +#define REW_PCP_DEI_CFG_DEI_QOS_VAL_SET(x)\ + FIELD_PREP(REW_PCP_DEI_CFG_DEI_QOS_VAL, x) +#define REW_PCP_DEI_CFG_DEI_QOS_VAL_GET(x)\ + FIELD_GET(REW_PCP_DEI_CFG_DEI_QOS_VAL, x) + +#define REW_PCP_DEI_CFG_PCP_QOS_VAL GENMASK(2, 0) +#define REW_PCP_DEI_CFG_PCP_QOS_VAL_SET(x)\ + FIELD_PREP(REW_PCP_DEI_CFG_PCP_QOS_VAL, x) +#define REW_PCP_DEI_CFG_PCP_QOS_VAL_GET(x)\ + FIELD_GET(REW_PCP_DEI_CFG_PCP_QOS_VAL, x) + +/* REW:COMMON:STAT_CFG */ +#define REW_STAT_CFG __REG(TARGET_REW, 0, 1, 3072, 0, 1, 528, 520, 0, 1, 4) + +#define REW_STAT_CFG_STAT_MODE GENMASK(1, 0) +#define REW_STAT_CFG_STAT_MODE_SET(x)\ + FIELD_PREP(REW_STAT_CFG_STAT_MODE, x) +#define REW_STAT_CFG_STAT_MODE_GET(x)\ + FIELD_GET(REW_STAT_CFG_STAT_MODE, x) + /* SYS:SYSTEM:RESET_CFG */ #define SYS_RESET_CFG __REG(TARGET_SYS, 0, 1, 4128, 0, 1, 168, 0, 0, 1, 4) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c b/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c index cf0cc7562d04..ee652f2d2359 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c @@ -21,8 +21,14 @@ static int lan966x_tc_setup_qdisc_mqprio(struct lan966x_port *port, static int lan966x_tc_setup_qdisc_taprio(struct lan966x_port *port, struct tc_taprio_qopt_offload *taprio) { - return taprio->enable ? lan966x_taprio_add(port, taprio) : - lan966x_taprio_del(port); + switch (taprio->cmd) { + case TAPRIO_CMD_REPLACE: + return lan966x_taprio_add(port, taprio); + case TAPRIO_CMD_DESTROY: + return lan966x_taprio_del(port); + default: + return -EOPNOTSUPP; + } } static int lan966x_tc_setup_qdisc_tbf(struct lan966x_port *port, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c index 47b2f7579dd2..96b3def6c474 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c @@ -5,6 +5,8 @@ #include "vcap_api_client.h" #include "vcap_tc.h" +#define LAN966X_FORCE_UNTAGED 3 + static bool lan966x_tc_is_known_etype(struct vcap_tc_flower_parse_usage *st, u16 etype) { @@ -29,6 +31,8 @@ static bool lan966x_tc_is_known_etype(struct vcap_tc_flower_parse_usage *st, return true; } break; + case VCAP_TYPE_ES0: + return true; default: NL_SET_ERR_MSG_MOD(st->fco->common.extack, "VCAP type not supported"); @@ -318,6 +322,9 @@ static int lan966x_tc_set_actionset(struct vcap_admin *admin, case VCAP_TYPE_IS2: aset = VCAP_AFS_BASE_TYPE; break; + case VCAP_TYPE_ES0: + aset = VCAP_AFS_VID; + break; default: return -EINVAL; } @@ -353,6 +360,10 @@ static int lan966x_tc_add_rule_link_target(struct vcap_admin *admin, /* Add IS2 specific PAG key (for chaining rules from IS1) */ return vcap_rule_add_key_u32(vrule, VCAP_KF_LOOKUP_PAG, link_val, ~0); + case VCAP_TYPE_ES0: + /* Add ES0 specific ISDX key (for chaining rules from IS1) */ + return vcap_rule_add_key_u32(vrule, VCAP_KF_ISDX_CLS, + link_val, ~0); default: break; } @@ -389,6 +400,18 @@ static int lan966x_tc_add_rule_link(struct vcap_control *vctrl, 0xff); if (err) return err; + } else if (admin->vtype == VCAP_TYPE_IS1 && + to_admin->vtype == VCAP_TYPE_ES0) { + /* This works for IS1->ES0 */ + err = vcap_rule_add_action_u32(vrule, VCAP_AF_ISDX_ADD_VAL, + diff); + if (err) + return err; + + err = vcap_rule_add_action_bit(vrule, VCAP_AF_ISDX_REPLACE_ENA, + VCAP_BIT_1); + if (err) + return err; } else { NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported chain destination"); @@ -398,6 +421,23 @@ static int lan966x_tc_add_rule_link(struct vcap_control *vctrl, return err; } +static int lan966x_tc_add_rule_counter(struct vcap_admin *admin, + struct vcap_rule *vrule) +{ + int err = 0; + + switch (admin->vtype) { + case VCAP_TYPE_ES0: + err = vcap_rule_mod_action_u32(vrule, VCAP_AF_ESDX, + vrule->id); + break; + default: + break; + } + + return err; +} + static int lan966x_tc_flower_add(struct lan966x_port *port, struct flow_cls_offload *f, struct vcap_admin *admin, @@ -466,6 +506,21 @@ static int lan966x_tc_flower_add(struct lan966x_port *port, goto out; break; + case FLOW_ACTION_VLAN_POP: + if (admin->vtype != VCAP_TYPE_ES0) { + NL_SET_ERR_MSG_MOD(f->common.extack, + "Cannot use vlan pop on non es0"); + err = -EOPNOTSUPP; + goto out; + } + + /* Force untag */ + err = vcap_rule_add_action_u32(vrule, VCAP_AF_PUSH_OUTER_TAG, + LAN966X_FORCE_UNTAGED); + if (err) + goto out; + + break; default: NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported TC action"); @@ -474,6 +529,12 @@ static int lan966x_tc_flower_add(struct lan966x_port *port, } } + err = lan966x_tc_add_rule_counter(admin, vrule); + if (err) { + vcap_set_tc_exterr(f, vrule); + goto out; + } + err = vcap_val_rule(vrule, l3_proto); if (err) { vcap_set_tc_exterr(f, vrule); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.c index 66400a082d02..fb6851b94528 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.c @@ -2121,6 +2121,69 @@ static const struct vcap_field is2_smac_sip6_keyfield[] = { }, }; +static const struct vcap_field es0_vid_keyfield[] = { + [VCAP_KF_IF_EGR_PORT_NO] = { + .type = VCAP_FIELD_U32, + .offset = 0, + .width = 4, + }, + [VCAP_KF_IF_IGR_PORT] = { + .type = VCAP_FIELD_U32, + .offset = 4, + .width = 4, + }, + [VCAP_KF_ISDX_GT0_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 8, + .width = 1, + }, + [VCAP_KF_ISDX_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 9, + .width = 8, + }, + [VCAP_KF_L2_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 17, + .width = 1, + }, + [VCAP_KF_L2_BC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 18, + .width = 1, + }, + [VCAP_KF_8021Q_VID_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 19, + .width = 12, + }, + [VCAP_KF_8021Q_DEI_CLS] = { + .type = VCAP_FIELD_BIT, + .offset = 31, + .width = 1, + }, + [VCAP_KF_8021Q_PCP_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 32, + .width = 3, + }, + [VCAP_KF_L3_DPL_CLS] = { + .type = VCAP_FIELD_BIT, + .offset = 35, + .width = 1, + }, + [VCAP_KF_RTP_ID] = { + .type = VCAP_FIELD_U32, + .offset = 36, + .width = 10, + }, + [VCAP_KF_PDU_TYPE] = { + .type = VCAP_FIELD_U32, + .offset = 46, + .width = 4, + }, +}; + /* keyfield_set */ static const struct vcap_set is1_keyfield_set[] = { [VCAP_KFS_NORMAL] = { @@ -2228,6 +2291,14 @@ static const struct vcap_set is2_keyfield_set[] = { }, }; +static const struct vcap_set es0_keyfield_set[] = { + [VCAP_KFS_VID] = { + .type_id = -1, + .sw_per_item = 1, + .sw_cnt = 1, + }, +}; + /* keyfield_set map */ static const struct vcap_field *is1_keyfield_set_map[] = { [VCAP_KFS_NORMAL] = is1_normal_keyfield, @@ -2255,6 +2326,10 @@ static const struct vcap_field *is2_keyfield_set_map[] = { [VCAP_KFS_SMAC_SIP6] = is2_smac_sip6_keyfield, }; +static const struct vcap_field *es0_keyfield_set_map[] = { + [VCAP_KFS_VID] = es0_vid_keyfield, +}; + /* keyfield_set map sizes */ static int is1_keyfield_set_map_size[] = { [VCAP_KFS_NORMAL] = ARRAY_SIZE(is1_normal_keyfield), @@ -2282,6 +2357,10 @@ static int is2_keyfield_set_map_size[] = { [VCAP_KFS_SMAC_SIP6] = ARRAY_SIZE(is2_smac_sip6_keyfield), }; +static int es0_keyfield_set_map_size[] = { + [VCAP_KFS_VID] = ARRAY_SIZE(es0_vid_keyfield), +}; + /* actionfields */ static const struct vcap_field is1_s1_actionfield[] = { [VCAP_AF_TYPE] = { @@ -2522,6 +2601,94 @@ static const struct vcap_field is2_smac_sip_actionfield[] = { }, }; +static const struct vcap_field es0_vid_actionfield[] = { + [VCAP_AF_PUSH_OUTER_TAG] = { + .type = VCAP_FIELD_U32, + .offset = 0, + .width = 2, + }, + [VCAP_AF_PUSH_INNER_TAG] = { + .type = VCAP_FIELD_BIT, + .offset = 2, + .width = 1, + }, + [VCAP_AF_TAG_A_TPID_SEL] = { + .type = VCAP_FIELD_U32, + .offset = 3, + .width = 2, + }, + [VCAP_AF_TAG_A_VID_SEL] = { + .type = VCAP_FIELD_BIT, + .offset = 5, + .width = 1, + }, + [VCAP_AF_TAG_A_PCP_SEL] = { + .type = VCAP_FIELD_U32, + .offset = 6, + .width = 2, + }, + [VCAP_AF_TAG_A_DEI_SEL] = { + .type = VCAP_FIELD_U32, + .offset = 8, + .width = 2, + }, + [VCAP_AF_TAG_B_TPID_SEL] = { + .type = VCAP_FIELD_U32, + .offset = 10, + .width = 2, + }, + [VCAP_AF_TAG_B_VID_SEL] = { + .type = VCAP_FIELD_BIT, + .offset = 12, + .width = 1, + }, + [VCAP_AF_TAG_B_PCP_SEL] = { + .type = VCAP_FIELD_U32, + .offset = 13, + .width = 2, + }, + [VCAP_AF_TAG_B_DEI_SEL] = { + .type = VCAP_FIELD_U32, + .offset = 15, + .width = 2, + }, + [VCAP_AF_VID_A_VAL] = { + .type = VCAP_FIELD_U32, + .offset = 17, + .width = 12, + }, + [VCAP_AF_PCP_A_VAL] = { + .type = VCAP_FIELD_U32, + .offset = 29, + .width = 3, + }, + [VCAP_AF_DEI_A_VAL] = { + .type = VCAP_FIELD_BIT, + .offset = 32, + .width = 1, + }, + [VCAP_AF_VID_B_VAL] = { + .type = VCAP_FIELD_U32, + .offset = 33, + .width = 12, + }, + [VCAP_AF_PCP_B_VAL] = { + .type = VCAP_FIELD_U32, + .offset = 45, + .width = 3, + }, + [VCAP_AF_DEI_B_VAL] = { + .type = VCAP_FIELD_BIT, + .offset = 48, + .width = 1, + }, + [VCAP_AF_ESDX] = { + .type = VCAP_FIELD_U32, + .offset = 49, + .width = 8, + }, +}; + /* actionfield_set */ static const struct vcap_set is1_actionfield_set[] = { [VCAP_AFS_S1] = { @@ -2544,6 +2711,14 @@ static const struct vcap_set is2_actionfield_set[] = { }, }; +static const struct vcap_set es0_actionfield_set[] = { + [VCAP_AFS_VID] = { + .type_id = -1, + .sw_per_item = 1, + .sw_cnt = 1, + }, +}; + /* actionfield_set map */ static const struct vcap_field *is1_actionfield_set_map[] = { [VCAP_AFS_S1] = is1_s1_actionfield, @@ -2554,6 +2729,10 @@ static const struct vcap_field *is2_actionfield_set_map[] = { [VCAP_AFS_SMAC_SIP] = is2_smac_sip_actionfield, }; +static const struct vcap_field *es0_actionfield_set_map[] = { + [VCAP_AFS_VID] = es0_vid_actionfield, +}; + /* actionfield_set map size */ static int is1_actionfield_set_map_size[] = { [VCAP_AFS_S1] = ARRAY_SIZE(is1_s1_actionfield), @@ -2564,6 +2743,10 @@ static int is2_actionfield_set_map_size[] = { [VCAP_AFS_SMAC_SIP] = ARRAY_SIZE(is2_smac_sip_actionfield), }; +static int es0_actionfield_set_map_size[] = { + [VCAP_AFS_VID] = ARRAY_SIZE(es0_vid_actionfield), +}; + /* Type Groups */ static const struct vcap_typegroup is1_x4_keyfield_set_typegroups[] = { { @@ -2659,6 +2842,10 @@ static const struct vcap_typegroup is2_x1_keyfield_set_typegroups[] = { {} }; +static const struct vcap_typegroup es0_x1_keyfield_set_typegroups[] = { + {} +}; + static const struct vcap_typegroup *is1_keyfield_set_typegroups[] = { [4] = is1_x4_keyfield_set_typegroups, [2] = is1_x2_keyfield_set_typegroups, @@ -2673,6 +2860,11 @@ static const struct vcap_typegroup *is2_keyfield_set_typegroups[] = { [5] = NULL, }; +static const struct vcap_typegroup *es0_keyfield_set_typegroups[] = { + [1] = es0_x1_keyfield_set_typegroups, + [2] = NULL, +}; + static const struct vcap_typegroup is1_x1_actionfield_set_typegroups[] = { {} }; @@ -2700,6 +2892,10 @@ static const struct vcap_typegroup is2_x1_actionfield_set_typegroups[] = { {} }; +static const struct vcap_typegroup es0_x1_actionfield_set_typegroups[] = { + {} +}; + static const struct vcap_typegroup *is1_actionfield_set_typegroups[] = { [1] = is1_x1_actionfield_set_typegroups, [5] = NULL, @@ -2711,6 +2907,11 @@ static const struct vcap_typegroup *is2_actionfield_set_typegroups[] = { [5] = NULL, }; +static const struct vcap_typegroup *es0_actionfield_set_typegroups[] = { + [1] = es0_x1_actionfield_set_typegroups, + [2] = NULL, +}; + /* Keyfieldset names */ static const char * const vcap_keyfield_set_names[] = { [VCAP_KFS_NO_VALUE] = "(None)", @@ -2743,6 +2944,7 @@ static const char * const vcap_keyfield_set_names[] = { [VCAP_KFS_RT] = "VCAP_KFS_RT", [VCAP_KFS_SMAC_SIP4] = "VCAP_KFS_SMAC_SIP4", [VCAP_KFS_SMAC_SIP6] = "VCAP_KFS_SMAC_SIP6", + [VCAP_KFS_VID] = "VCAP_KFS_VID", }; /* Actionfieldset names */ @@ -2751,9 +2953,11 @@ static const char * const vcap_actionfield_set_names[] = { [VCAP_AFS_BASE_TYPE] = "VCAP_AFS_BASE_TYPE", [VCAP_AFS_CLASSIFICATION] = "VCAP_AFS_CLASSIFICATION", [VCAP_AFS_CLASS_REDUCED] = "VCAP_AFS_CLASS_REDUCED", + [VCAP_AFS_ES0] = "VCAP_AFS_ES0", [VCAP_AFS_FULL] = "VCAP_AFS_FULL", [VCAP_AFS_S1] = "VCAP_AFS_S1", [VCAP_AFS_SMAC_SIP] = "VCAP_AFS_SMAC_SIP", + [VCAP_AFS_VID] = "VCAP_AFS_VID", }; /* Keyfield names */ @@ -2774,6 +2978,7 @@ static const char * const vcap_keyfield_names[] = { [VCAP_KF_8021Q_PCP1] = "8021Q_PCP1", [VCAP_KF_8021Q_PCP2] = "8021Q_PCP2", [VCAP_KF_8021Q_PCP_CLS] = "8021Q_PCP_CLS", + [VCAP_KF_8021Q_TPID] = "8021Q_TPID", [VCAP_KF_8021Q_TPID0] = "8021Q_TPID0", [VCAP_KF_8021Q_TPID1] = "8021Q_TPID1", [VCAP_KF_8021Q_TPID2] = "8021Q_TPID2", @@ -2799,6 +3004,7 @@ static const char * const vcap_keyfield_names[] = { [VCAP_KF_HOST_MATCH] = "HOST_MATCH", [VCAP_KF_IF_EGR_PORT_MASK] = "IF_EGR_PORT_MASK", [VCAP_KF_IF_EGR_PORT_MASK_RNG] = "IF_EGR_PORT_MASK_RNG", + [VCAP_KF_IF_EGR_PORT_NO] = "IF_EGR_PORT_NO", [VCAP_KF_IF_IGR_PORT] = "IF_IGR_PORT", [VCAP_KF_IF_IGR_PORT_MASK] = "IF_IGR_PORT_MASK", [VCAP_KF_IF_IGR_PORT_MASK_L3] = "IF_IGR_PORT_MASK_L3", @@ -2873,7 +3079,9 @@ static const char * const vcap_keyfield_names[] = { [VCAP_KF_OAM_OPCODE] = "OAM_OPCODE", [VCAP_KF_OAM_VER] = "OAM_VER", [VCAP_KF_OAM_Y1731_IS] = "OAM_Y1731_IS", + [VCAP_KF_PDU_TYPE] = "PDU_TYPE", [VCAP_KF_PROT_ACTIVE] = "PROT_ACTIVE", + [VCAP_KF_RTP_ID] = "RTP_ID", [VCAP_KF_RT_FRMID] = "RT_FRMID", [VCAP_KF_RT_TYPE] = "RT_TYPE", [VCAP_KF_RT_VLAN_IDX] = "RT_VLAN_IDX", @@ -2891,18 +3099,25 @@ static const char * const vcap_actionfield_names[] = { [VCAP_AF_COPY_PORT_NUM] = "COPY_PORT_NUM", [VCAP_AF_COPY_QUEUE_NUM] = "COPY_QUEUE_NUM", [VCAP_AF_CPU_COPY_ENA] = "CPU_COPY_ENA", + [VCAP_AF_CPU_QU] = "CPU_QU", [VCAP_AF_CPU_QUEUE_NUM] = "CPU_QUEUE_NUM", [VCAP_AF_CUSTOM_ACE_TYPE_ENA] = "CUSTOM_ACE_TYPE_ENA", + [VCAP_AF_DEI_A_VAL] = "DEI_A_VAL", + [VCAP_AF_DEI_B_VAL] = "DEI_B_VAL", + [VCAP_AF_DEI_C_VAL] = "DEI_C_VAL", [VCAP_AF_DEI_ENA] = "DEI_ENA", [VCAP_AF_DEI_VAL] = "DEI_VAL", [VCAP_AF_DLR_SEL] = "DLR_SEL", [VCAP_AF_DP_ENA] = "DP_ENA", [VCAP_AF_DP_VAL] = "DP_VAL", [VCAP_AF_DSCP_ENA] = "DSCP_ENA", + [VCAP_AF_DSCP_SEL] = "DSCP_SEL", [VCAP_AF_DSCP_VAL] = "DSCP_VAL", [VCAP_AF_ES2_REW_CMD] = "ES2_REW_CMD", + [VCAP_AF_ESDX] = "ESDX", [VCAP_AF_FWD_KILL_ENA] = "FWD_KILL_ENA", [VCAP_AF_FWD_MODE] = "FWD_MODE", + [VCAP_AF_FWD_SEL] = "FWD_SEL", [VCAP_AF_HIT_ME_ONCE] = "HIT_ME_ONCE", [VCAP_AF_HOST_MATCH] = "HOST_MATCH", [VCAP_AF_IGNORE_PIPELINE_CTRL] = "IGNORE_PIPELINE_CTRL", @@ -2912,6 +3127,7 @@ static const char * const vcap_actionfield_names[] = { [VCAP_AF_ISDX_ENA] = "ISDX_ENA", [VCAP_AF_ISDX_REPLACE_ENA] = "ISDX_REPLACE_ENA", [VCAP_AF_ISDX_VAL] = "ISDX_VAL", + [VCAP_AF_LOOP_ENA] = "LOOP_ENA", [VCAP_AF_LRN_DIS] = "LRN_DIS", [VCAP_AF_MAP_IDX] = "MAP_IDX", [VCAP_AF_MAP_KEY] = "MAP_KEY", @@ -2928,15 +3144,23 @@ static const char * const vcap_actionfield_names[] = { [VCAP_AF_OAM_SEL] = "OAM_SEL", [VCAP_AF_PAG_OVERRIDE_MASK] = "PAG_OVERRIDE_MASK", [VCAP_AF_PAG_VAL] = "PAG_VAL", + [VCAP_AF_PCP_A_VAL] = "PCP_A_VAL", + [VCAP_AF_PCP_B_VAL] = "PCP_B_VAL", + [VCAP_AF_PCP_C_VAL] = "PCP_C_VAL", [VCAP_AF_PCP_ENA] = "PCP_ENA", [VCAP_AF_PCP_VAL] = "PCP_VAL", + [VCAP_AF_PIPELINE_ACT] = "PIPELINE_ACT", [VCAP_AF_PIPELINE_FORCE_ENA] = "PIPELINE_FORCE_ENA", [VCAP_AF_PIPELINE_PT] = "PIPELINE_PT", [VCAP_AF_POLICE_ENA] = "POLICE_ENA", [VCAP_AF_POLICE_IDX] = "POLICE_IDX", [VCAP_AF_POLICE_REMARK] = "POLICE_REMARK", [VCAP_AF_POLICE_VCAP_ONLY] = "POLICE_VCAP_ONLY", + [VCAP_AF_POP_VAL] = "POP_VAL", [VCAP_AF_PORT_MASK] = "PORT_MASK", + [VCAP_AF_PUSH_CUSTOMER_TAG] = "PUSH_CUSTOMER_TAG", + [VCAP_AF_PUSH_INNER_TAG] = "PUSH_INNER_TAG", + [VCAP_AF_PUSH_OUTER_TAG] = "PUSH_OUTER_TAG", [VCAP_AF_QOS_ENA] = "QOS_ENA", [VCAP_AF_QOS_VAL] = "QOS_VAL", [VCAP_AF_REW_OP] = "REW_OP", @@ -2945,7 +3169,24 @@ static const char * const vcap_actionfield_names[] = { [VCAP_AF_SFID_VAL] = "SFID_VAL", [VCAP_AF_SGID_ENA] = "SGID_ENA", [VCAP_AF_SGID_VAL] = "SGID_VAL", + [VCAP_AF_SWAP_MACS_ENA] = "SWAP_MACS_ENA", + [VCAP_AF_TAG_A_DEI_SEL] = "TAG_A_DEI_SEL", + [VCAP_AF_TAG_A_PCP_SEL] = "TAG_A_PCP_SEL", + [VCAP_AF_TAG_A_TPID_SEL] = "TAG_A_TPID_SEL", + [VCAP_AF_TAG_A_VID_SEL] = "TAG_A_VID_SEL", + [VCAP_AF_TAG_B_DEI_SEL] = "TAG_B_DEI_SEL", + [VCAP_AF_TAG_B_PCP_SEL] = "TAG_B_PCP_SEL", + [VCAP_AF_TAG_B_TPID_SEL] = "TAG_B_TPID_SEL", + [VCAP_AF_TAG_B_VID_SEL] = "TAG_B_VID_SEL", + [VCAP_AF_TAG_C_DEI_SEL] = "TAG_C_DEI_SEL", + [VCAP_AF_TAG_C_PCP_SEL] = "TAG_C_PCP_SEL", + [VCAP_AF_TAG_C_TPID_SEL] = "TAG_C_TPID_SEL", + [VCAP_AF_TAG_C_VID_SEL] = "TAG_C_VID_SEL", [VCAP_AF_TYPE] = "TYPE", + [VCAP_AF_UNTAG_VID_ENA] = "UNTAG_VID_ENA", + [VCAP_AF_VID_A_VAL] = "VID_A_VAL", + [VCAP_AF_VID_B_VAL] = "VID_B_VAL", + [VCAP_AF_VID_C_VAL] = "VID_C_VAL", [VCAP_AF_VID_REPLACE_ENA] = "VID_REPLACE_ENA", [VCAP_AF_VID_VAL] = "VID_VAL", [VCAP_AF_VLAN_POP_CNT] = "VLAN_POP_CNT", @@ -2996,11 +3237,32 @@ const struct vcap_info lan966x_vcaps[] = { .keyfield_set_typegroups = is2_keyfield_set_typegroups, .actionfield_set_typegroups = is2_actionfield_set_typegroups, }, + [VCAP_TYPE_ES0] = { + .name = "es0", + .rows = 256, + .sw_count = 1, + .sw_width = 96, + .sticky_width = 1, + .act_width = 65, + .default_cnt = 8, + .require_cnt_dis = 0, + .version = 1, + .keyfield_set = es0_keyfield_set, + .keyfield_set_size = ARRAY_SIZE(es0_keyfield_set), + .actionfield_set = es0_actionfield_set, + .actionfield_set_size = ARRAY_SIZE(es0_actionfield_set), + .keyfield_set_map = es0_keyfield_set_map, + .keyfield_set_map_size = es0_keyfield_set_map_size, + .actionfield_set_map = es0_actionfield_set_map, + .actionfield_set_map_size = es0_actionfield_set_map_size, + .keyfield_set_typegroups = es0_keyfield_set_typegroups, + .actionfield_set_typegroups = es0_actionfield_set_typegroups, + }, }; const struct vcap_statistics lan966x_vcap_stats = { .name = "lan966x", - .count = 2, + .count = 3, .keyfield_set_names = vcap_keyfield_set_names, .actionfield_set_names = vcap_actionfield_set_names, .keyfield_names = vcap_keyfield_names, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c index d90c08cfcf14..ac525ff1503e 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c @@ -190,6 +190,26 @@ static void lan966x_vcap_is2_port_keys(struct lan966x_port *port, out->prf(out->dst, "\n"); } +static void lan966x_vcap_es0_port_keys(struct lan966x_port *port, + struct vcap_admin *admin, + struct vcap_output_print *out) +{ + struct lan966x *lan966x = port->lan966x; + u32 val; + + out->prf(out->dst, " port[%d] (%s): ", port->chip_port, + netdev_name(port->dev)); + + val = lan_rd(lan966x, REW_PORT_CFG(port->chip_port)); + out->prf(out->dst, "\n state: "); + if (REW_PORT_CFG_ES0_EN_GET(val)) + out->prf(out->dst, "on"); + else + out->prf(out->dst, "off"); + + out->prf(out->dst, "\n"); +} + int lan966x_vcap_port_info(struct net_device *dev, struct vcap_admin *admin, struct vcap_output_print *out) @@ -210,6 +230,9 @@ int lan966x_vcap_port_info(struct net_device *dev, case VCAP_TYPE_IS1: lan966x_vcap_is1_port_keys(port, admin, out); break; + case VCAP_TYPE_ES0: + lan966x_vcap_es0_port_keys(port, admin, out); + break; default: out->prf(out->dst, " no info\n"); break; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c index 7ea8e8633609..a4414f63c9b1 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c @@ -10,6 +10,12 @@ #define LAN966X_IS1_LOOKUPS 3 #define LAN966X_IS2_LOOKUPS 2 +#define LAN966X_ES0_LOOKUPS 1 + +#define LAN966X_STAT_ESDX_GRN_BYTES 0x300 +#define LAN966X_STAT_ESDX_GRN_PKTS 0x301 +#define LAN966X_STAT_ESDX_YEL_BYTES 0x302 +#define LAN966X_STAT_ESDX_YEL_PKTS 0x303 static struct lan966x_vcap_inst { enum vcap_type vtype; /* type of vcap */ @@ -21,6 +27,14 @@ static struct lan966x_vcap_inst { bool ingress; /* is vcap in the ingress path */ } lan966x_vcap_inst_cfg[] = { { + .vtype = VCAP_TYPE_ES0, + .tgt_inst = 0, + .lookups = LAN966X_ES0_LOOKUPS, + .first_cid = LAN966X_VCAP_CID_ES0_L0, + .last_cid = LAN966X_VCAP_CID_ES0_MAX, + .count = 64, + }, + { .vtype = VCAP_TYPE_IS1, /* IS1-0 */ .tgt_inst = 1, .lookups = LAN966X_IS1_LOOKUPS, @@ -279,6 +293,8 @@ lan966x_vcap_validate_keyset(struct net_device *dev, err = lan966x_vcap_is2_get_port_keysets(dev, lookup, &keysetlist, l3_proto); break; + case VCAP_TYPE_ES0: + return kslist->keysets[0]; default: pr_err("vcap type: %s not supported\n", lan966x_vcaps[admin->vtype].name); @@ -338,6 +354,14 @@ static void lan966x_vcap_is2_add_default_fields(struct lan966x_port *port, VCAP_BIT_0); } +static void lan966x_vcap_es0_add_default_fields(struct lan966x_port *port, + struct vcap_admin *admin, + struct vcap_rule *rule) +{ + vcap_rule_add_key_u32(rule, VCAP_KF_IF_EGR_PORT_NO, + port->chip_port, GENMASK(4, 0)); +} + static void lan966x_vcap_add_default_fields(struct net_device *dev, struct vcap_admin *admin, struct vcap_rule *rule) @@ -351,6 +375,9 @@ static void lan966x_vcap_add_default_fields(struct net_device *dev, case VCAP_TYPE_IS2: lan966x_vcap_is2_add_default_fields(port, admin, rule); break; + case VCAP_TYPE_ES0: + lan966x_vcap_es0_add_default_fields(port, admin, rule); + break; default: pr_err("vcap type: %s not supported\n", lan966x_vcaps[admin->vtype].name); @@ -366,6 +393,40 @@ static void lan966x_vcap_cache_erase(struct vcap_admin *admin) memset(&admin->cache.counter, 0, sizeof(admin->cache.counter)); } +/* The ESDX counter is only used/incremented if the frame has been classified + * with an ISDX > 0 (e.g by a rule in IS0). This is not mentioned in the + * datasheet. + */ +static void lan966x_es0_read_esdx_counter(struct lan966x *lan966x, + struct vcap_admin *admin, u32 id) +{ + u32 counter; + + id = id & 0xff; /* counter limit */ + mutex_lock(&lan966x->stats_lock); + lan_wr(SYS_STAT_CFG_STAT_VIEW_SET(id), lan966x, SYS_STAT_CFG); + counter = lan_rd(lan966x, SYS_CNT(LAN966X_STAT_ESDX_GRN_PKTS)) + + lan_rd(lan966x, SYS_CNT(LAN966X_STAT_ESDX_YEL_PKTS)); + mutex_unlock(&lan966x->stats_lock); + if (counter) + admin->cache.counter = counter; +} + +static void lan966x_es0_write_esdx_counter(struct lan966x *lan966x, + struct vcap_admin *admin, u32 id) +{ + id = id & 0xff; /* counter limit */ + + mutex_lock(&lan966x->stats_lock); + lan_wr(SYS_STAT_CFG_STAT_VIEW_SET(id), lan966x, SYS_STAT_CFG); + lan_wr(0, lan966x, SYS_CNT(LAN966X_STAT_ESDX_GRN_BYTES)); + lan_wr(admin->cache.counter, lan966x, + SYS_CNT(LAN966X_STAT_ESDX_GRN_PKTS)); + lan_wr(0, lan966x, SYS_CNT(LAN966X_STAT_ESDX_YEL_BYTES)); + lan_wr(0, lan966x, SYS_CNT(LAN966X_STAT_ESDX_YEL_PKTS)); + mutex_unlock(&lan966x->stats_lock); +} + static void lan966x_vcap_cache_write(struct net_device *dev, struct vcap_admin *admin, enum vcap_selection sel, @@ -398,6 +459,9 @@ static void lan966x_vcap_cache_write(struct net_device *dev, admin->cache.sticky = admin->cache.counter > 0; lan_wr(admin->cache.counter, lan966x, VCAP_CNT_DAT(admin->tgt_inst, 0)); + + if (admin->vtype == VCAP_TYPE_ES0) + lan966x_es0_write_esdx_counter(lan966x, admin, start); break; default: break; @@ -437,6 +501,9 @@ static void lan966x_vcap_cache_read(struct net_device *dev, admin->cache.counter = lan_rd(lan966x, VCAP_CNT_DAT(instance, 0)); admin->cache.sticky = admin->cache.counter > 0; + + if (admin->vtype == VCAP_TYPE_ES0) + lan966x_es0_read_esdx_counter(lan966x, admin, start); } } @@ -625,6 +692,12 @@ static void lan966x_vcap_port_key_deselection(struct lan966x *lan966x, lan_wr(0, lan966x, ANA_VCAP_S2_CFG(p)); break; + case VCAP_TYPE_ES0: + for (int p = 0; p < lan966x->num_phys_ports; ++p) + lan_rmw(REW_PORT_CFG_ES0_EN_SET(false), + REW_PORT_CFG_ES0_EN, lan966x, + REW_PORT_CFG(p)); + break; default: pr_err("vcap type: %s not supported\n", lan966x_vcaps[admin->vtype].name); @@ -674,9 +747,18 @@ int lan966x_vcap_init(struct lan966x *lan966x) lan_rmw(ANA_VCAP_CFG_S1_ENA_SET(true), ANA_VCAP_CFG_S1_ENA, lan966x, ANA_VCAP_CFG(lan966x->ports[p]->chip_port)); + + lan_rmw(REW_PORT_CFG_ES0_EN_SET(true), + REW_PORT_CFG_ES0_EN, lan966x, + REW_PORT_CFG(lan966x->ports[p]->chip_port)); } } + /* Statistics: Use ESDX from ES0 if hit, otherwise no counting */ + lan_rmw(REW_STAT_CFG_STAT_MODE_SET(1), + REW_STAT_CFG_STAT_MODE, lan966x, + REW_STAT_CFG); + lan966x->vcap_ctrl = ctrl; return 0; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index a7edf524eedb..dc9af480bfea 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -281,6 +281,7 @@ static int sparx5_create_port(struct sparx5 *sparx5, spx5_port->custom_etype = 0x8880; /* Vitesse */ spx5_port->phylink_pcs.poll = true; spx5_port->phylink_pcs.ops = &sparx5_phylink_pcs_ops; + spx5_port->phylink_pcs.neg_mode = true; spx5_port->is_mrouter = false; INIT_LIST_HEAD(&spx5_port->tc_templates); sparx5->ports[config->portno] = spx5_port; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c b/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c index bb97d27a1da4..f8562c1a894d 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c @@ -91,8 +91,7 @@ static void sparx5_pcs_get_state(struct phylink_pcs *pcs, state->pause = status.pause; } -static int sparx5_pcs_config(struct phylink_pcs *pcs, - unsigned int mode, +static int sparx5_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, bool permit_pause_to_mac) @@ -104,8 +103,9 @@ static int sparx5_pcs_config(struct phylink_pcs *pcs, conf = port->conf; conf.power_down = false; conf.portmode = interface; - conf.inband = phylink_autoneg_inband(mode); - conf.autoneg = phylink_test(advertising, Autoneg); + conf.inband = neg_mode == PHYLINK_PCS_NEG_INBAND_DISABLED || + neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED; + conf.autoneg = neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED; conf.pause_adv = 0; if (phylink_test(advertising, Pause)) conf.pause_adv |= ADVERTISE_1000XPAUSE; diff --git a/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h b/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h index a556c4419986..c3569a4c7b69 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h +++ b/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h @@ -3,8 +3,8 @@ * Microchip VCAP API */ -/* This file is autogenerated by cml-utils 2023-02-16 11:41:14 +0100. - * Commit ID: be85f176b3a151fa748dcaf97c8824a5c2e065f3 +/* This file is autogenerated by cml-utils 2023-03-13 10:16:42 +0100. + * Commit ID: 259f0efd6d6d91bfbf62858de153cc757b6bffa3 (dirty) */ #ifndef __VCAP_AG_API__ @@ -51,6 +51,7 @@ enum vcap_keyfield_set { VCAP_KFS_RT, /* lan966x is1 X1 */ VCAP_KFS_SMAC_SIP4, /* lan966x is2 X1 */ VCAP_KFS_SMAC_SIP6, /* lan966x is2 X2 */ + VCAP_KFS_VID, /* lan966x es0 X1 */ }; /* List of keyfields with description @@ -79,7 +80,7 @@ enum vcap_keyfield_set { * Second DEI in multiple vlan tags (inner tag) * VCAP_KF_8021Q_DEI2: W1, sparx5: is0 * Third DEI in multiple vlan tags (not always available) - * VCAP_KF_8021Q_DEI_CLS: W1, sparx5: is2/es2, lan966x: is2 + * VCAP_KF_8021Q_DEI_CLS: W1, sparx5: is2/es2, lan966x: is2/es0 * Classified DEI * VCAP_KF_8021Q_PCP0: W3, sparx5: is0, lan966x: is1 * First PCP in multiple vlan tags (outer tag or default port tag) @@ -87,7 +88,7 @@ enum vcap_keyfield_set { * Second PCP in multiple vlan tags (inner tag) * VCAP_KF_8021Q_PCP2: W3, sparx5: is0 * Third PCP in multiple vlan tags (not always available) - * VCAP_KF_8021Q_PCP_CLS: W3, sparx5: is2/es2, lan966x: is2 + * VCAP_KF_8021Q_PCP_CLS: W3, sparx5: is2/es2, lan966x: is2/es0 * Classified PCP * VCAP_KF_8021Q_TPID: W3, sparx5: es0 * TPID for outer tag: 0: Customer TPID 1: Service TPID (88A8 or programmable) @@ -104,7 +105,7 @@ enum vcap_keyfield_set { * VCAP_KF_8021Q_VID2: W12, sparx5: is0 * Third VID in multiple vlan tags (not always available) * VCAP_KF_8021Q_VID_CLS: sparx5 is2 W13, sparx5 es0 W13, sparx5 es2 W13, - * lan966x is2 W12 + * lan966x is2 W12, lan966x es0 W12 * Classified VID * VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS: W1, lan966x: is1 * Set if frame has two or more Q-tags. Independent of port VLAN awareness @@ -146,10 +147,10 @@ enum vcap_keyfield_set { * VCAP_KF_IF_EGR_PORT_MASK_RNG: W3, sparx5: es2 * Select which 32 port group is available in IF_EGR_PORT (or virtual ports or * CPU queue) - * VCAP_KF_IF_EGR_PORT_NO: W7, sparx5: es0 + * VCAP_KF_IF_EGR_PORT_NO: sparx5 es0 W7, lan966x es0 W4 * Egress port number * VCAP_KF_IF_IGR_PORT: sparx5 is0 W7, sparx5 es2 W9, lan966x is1 W3, lan966x - * is2 W4 + * is2 W4, lan966x es0 W4 * Sparx5: Logical ingress port number retrieved from * ANA_CL::PORT_ID_CFG.LPORT_NUM or ERLEG, LAN966x: ingress port nunmber * VCAP_KF_IF_IGR_PORT_MASK: sparx5 is0 W65, sparx5 is2 W32, sparx5 is2 W65, @@ -178,11 +179,12 @@ enum vcap_keyfield_set { * Payload after IPv6 header * VCAP_KF_IP_SNAP_IS: W1, sparx5: is0, lan966x: is1 * Set if frame is IPv4, IPv6, or SNAP frame - * VCAP_KF_ISDX_CLS: W12, sparx5: is2/es0/es2 + * VCAP_KF_ISDX_CLS: sparx5 is2 W12, sparx5 es0 W12, sparx5 es2 W12, lan966x es0 + * W8 * Classified ISDX - * VCAP_KF_ISDX_GT0_IS: W1, sparx5: is2/es0/es2, lan966x: is2 + * VCAP_KF_ISDX_GT0_IS: W1, sparx5: is2/es0/es2, lan966x: is2/es0 * Set if classified ISDX > 0 - * VCAP_KF_L2_BC_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2 + * VCAP_KF_L2_BC_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2/es0 * Set if frame's destination MAC address is the broadcast address * (FF-FF-FF-FF-FF-FF). * VCAP_KF_L2_DMAC: W48, sparx5: is0/is2/es2, lan966x: is1/is2 @@ -195,7 +197,7 @@ enum vcap_keyfield_set { * LLC header and data after up to two VLAN tags and the type/length field * VCAP_KF_L2_MAC: W48, lan966x: is1 * MAC address (FIRST=1: SMAC, FIRST=0: DMAC) - * VCAP_KF_L2_MC_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2 + * VCAP_KF_L2_MC_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2/es0 * Set if frame's destination MAC address is a multicast address (bit 40 = 1). * VCAP_KF_L2_PAYLOAD0: W16, lan966x: is2 * Payload bytes 0-1 after the frame's EtherType @@ -213,7 +215,7 @@ enum vcap_keyfield_set { * SNAP header after LLC header (AA-AA-03) * VCAP_KF_L3_DIP_EQ_SIP_IS: W1, sparx5: is2/es2, lan966x: is2 * Set if Src IP matches Dst IP address - * VCAP_KF_L3_DPL_CLS: W1, sparx5: es0/es2 + * VCAP_KF_L3_DPL_CLS: W1, sparx5: es0/es2, lan966x: es0 * The frames drop precedence level * VCAP_KF_L3_DSCP: W6, sparx5: is0, lan966x: is1 * Frame's DSCP value @@ -330,8 +332,12 @@ enum vcap_keyfield_set { * Frame's OAM version * VCAP_KF_OAM_Y1731_IS: W1, sparx5: is2/es2, lan966x: is2 * Set if frame's EtherType = 0x8902 + * VCAP_KF_PDU_TYPE: W4, lan966x: es0 + * PDU type value (none, OAM CCM, MRP, DLR, RTE, IPv4, IPv6, OAM non-CCM) * VCAP_KF_PROT_ACTIVE: W1, sparx5: es0/es2 * Protection is active + * VCAP_KF_RTP_ID: W10, lan966x: es0 + * Classified RTP_ID * VCAP_KF_RT_FRMID: W32, lan966x: is1 * Profinet or OPC-UA FrameId * VCAP_KF_RT_TYPE: W2, lan966x: is1 @@ -470,7 +476,9 @@ enum vcap_key_field { VCAP_KF_OAM_OPCODE, VCAP_KF_OAM_VER, VCAP_KF_OAM_Y1731_IS, + VCAP_KF_PDU_TYPE, VCAP_KF_PROT_ACTIVE, + VCAP_KF_RTP_ID, VCAP_KF_RT_FRMID, VCAP_KF_RT_TYPE, VCAP_KF_RT_VLAN_IDX, @@ -489,6 +497,7 @@ enum vcap_actionfield_set { VCAP_AFS_FULL, /* sparx5 is0 X3 */ VCAP_AFS_S1, /* lan966x is1 X1 */ VCAP_AFS_SMAC_SIP, /* lan966x is2 X1 */ + VCAP_AFS_VID, /* lan966x es0 X1 */ }; /* List of actionfields with description @@ -523,9 +532,9 @@ enum vcap_actionfield_set { * while bits 1:0 control first lookup. Encoding per lookup: 0: Disabled. 1: * Extract 40 bytes after position corresponding to the location of the IPv4 * header and use as key. 2: Extract 40 bytes after SMAC and use as key - * VCAP_AF_DEI_A_VAL: W1, sparx5: es0 + * VCAP_AF_DEI_A_VAL: W1, sparx5: es0, lan966x: es0 * DEI used in ES0 tag A. See TAG_A_DEI_SEL. - * VCAP_AF_DEI_B_VAL: W1, sparx5: es0 + * VCAP_AF_DEI_B_VAL: W1, sparx5: es0, lan966x: es0 * DEI used in ES0 tag B. See TAG_B_DEI_SEL. * VCAP_AF_DEI_C_VAL: W1, sparx5: es0 * DEI used in ES0 tag C. See TAG_C_DEI_SEL. @@ -556,7 +565,7 @@ enum vcap_actionfield_set { * VCAP_AF_ES2_REW_CMD: W3, sparx5: es2 * Command forwarded to REW: 0: No action. 1: SWAP MAC addresses. 2: Do L2CP * DMAC translation when entering or leaving a tunnel. - * VCAP_AF_ESDX: W13, sparx5: es0 + * VCAP_AF_ESDX: sparx5 es0 W13, lan966x es0 W8 * Egress counter index. Used to index egress counter set as defined in * REW::STAT_CFG. * VCAP_AF_FWD_KILL_ENA: W1, lan966x: is2 @@ -652,9 +661,9 @@ enum vcap_actionfield_set { * (input) AND ~PAG_OVERRIDE_MASK) OR (PAG_VAL AND PAG_OVERRIDE_MASK) * VCAP_AF_PAG_VAL: W8, sparx5: is0, lan966x: is1 * See PAG_OVERRIDE_MASK. - * VCAP_AF_PCP_A_VAL: W3, sparx5: es0 + * VCAP_AF_PCP_A_VAL: W3, sparx5: es0, lan966x: es0 * PCP used in ES0 tag A. See TAG_A_PCP_SEL. - * VCAP_AF_PCP_B_VAL: W3, sparx5: es0 + * VCAP_AF_PCP_B_VAL: W3, sparx5: es0, lan966x: es0 * PCP used in ES0 tag B. See TAG_B_PCP_SEL. * VCAP_AF_PCP_C_VAL: W3, sparx5: es0 * PCP used in ES0 tag C. See TAG_C_PCP_SEL. @@ -691,10 +700,10 @@ enum vcap_actionfield_set { * Selects tag C mode: 0: Do not push tag C. 1: Push tag C if * IFH.VSTAX.TAG.WAS_TAGGED = 1. 2: Push tag C if IFH.VSTAX.TAG.WAS_TAGGED = 0. * 3: Push tag C if UNTAG_VID_ENA = 0 or (C-TAG.VID ! = VID_C_VAL). - * VCAP_AF_PUSH_INNER_TAG: W1, sparx5: es0 + * VCAP_AF_PUSH_INNER_TAG: W1, sparx5: es0, lan966x: es0 * Controls inner tagging. 0: Do not push ES0 tag B as inner tag. 1: Push ES0 * tag B as inner tag. - * VCAP_AF_PUSH_OUTER_TAG: W2, sparx5: es0 + * VCAP_AF_PUSH_OUTER_TAG: W2, sparx5: es0, lan966x: es0 * Controls outer tagging. 0: No ES0 tag A: Port tag is allowed if enabled on * port. 1: ES0 tag A: Push ES0 tag A. No port tag. 2: Force port tag: Always * push port tag. No ES0 tag A. 3: Force untag: Never push port tag or ES0 tag @@ -720,29 +729,29 @@ enum vcap_actionfield_set { * VCAP_AF_SWAP_MACS_ENA: W1, sparx5: es0 * This setting is only active when FWD_SEL = 1 or FWD_SEL = 2 and PIPELINE_ACT * = LBK_ASM. 0: No action. 1: Swap MACs and clear bit 40 in new SMAC. - * VCAP_AF_TAG_A_DEI_SEL: W3, sparx5: es0 + * VCAP_AF_TAG_A_DEI_SEL: sparx5 es0 W3, lan966x es0 W2 * Selects PCP for ES0 tag A. 0: Classified DEI. 1: DEI_A_VAL. 2: DP and QoS * mapped to PCP (per port table). 3: DP. - * VCAP_AF_TAG_A_PCP_SEL: W3, sparx5: es0 + * VCAP_AF_TAG_A_PCP_SEL: sparx5 es0 W3, lan966x es0 W2 * Selects PCP for ES0 tag A. 0: Classified PCP. 1: PCP_A_VAL. 2: DP and QoS * mapped to PCP (per port table). 3: QoS class. - * VCAP_AF_TAG_A_TPID_SEL: W3, sparx5: es0 + * VCAP_AF_TAG_A_TPID_SEL: sparx5 es0 W3, lan966x es0 W2 * Selects TPID for ES0 tag A: 0: 0x8100. 1: 0x88A8. 2: Custom * (REW:PORT:PORT_VLAN_CFG.PORT_TPID). 3: If IFH.TAG_TYPE = 0 then 0x8100 else * custom. - * VCAP_AF_TAG_A_VID_SEL: W2, sparx5: es0 + * VCAP_AF_TAG_A_VID_SEL: sparx5 es0 W2, lan966x es0 W1 * Selects VID for ES0 tag A. 0: Classified VID + VID_A_VAL. 1: VID_A_VAL. - * VCAP_AF_TAG_B_DEI_SEL: W3, sparx5: es0 + * VCAP_AF_TAG_B_DEI_SEL: sparx5 es0 W3, lan966x es0 W2 * Selects PCP for ES0 tag B. 0: Classified DEI. 1: DEI_B_VAL. 2: DP and QoS * mapped to PCP (per port table). 3: DP. - * VCAP_AF_TAG_B_PCP_SEL: W3, sparx5: es0 + * VCAP_AF_TAG_B_PCP_SEL: sparx5 es0 W3, lan966x es0 W2 * Selects PCP for ES0 tag B. 0: Classified PCP. 1: PCP_B_VAL. 2: DP and QoS * mapped to PCP (per port table). 3: QoS class. - * VCAP_AF_TAG_B_TPID_SEL: W3, sparx5: es0 + * VCAP_AF_TAG_B_TPID_SEL: sparx5 es0 W3, lan966x es0 W2 * Selects TPID for ES0 tag B. 0: 0x8100. 1: 0x88A8. 2: Custom * (REW:PORT:PORT_VLAN_CFG.PORT_TPID). 3: If IFH.TAG_TYPE = 0 then 0x8100 else * custom. - * VCAP_AF_TAG_B_VID_SEL: W2, sparx5: es0 + * VCAP_AF_TAG_B_VID_SEL: sparx5 es0 W2, lan966x es0 W1 * Selects VID for ES0 tag B. 0: Classified VID + VID_B_VAL. 1: VID_B_VAL. * VCAP_AF_TAG_C_DEI_SEL: W3, sparx5: es0 * Selects DEI source for ES0 tag C. 0: Classified DEI. 1: DEI_C_VAL. 2: @@ -770,9 +779,9 @@ enum vcap_actionfield_set { * VCAP_AF_UNTAG_VID_ENA: W1, sparx5: es0 * Controls insertion of tag C. Untag or insert mode can be selected. See * PUSH_CUSTOMER_TAG. - * VCAP_AF_VID_A_VAL: W12, sparx5: es0 + * VCAP_AF_VID_A_VAL: W12, sparx5: es0, lan966x: es0 * VID used in ES0 tag A. See TAG_A_VID_SEL. - * VCAP_AF_VID_B_VAL: W12, sparx5: es0 + * VCAP_AF_VID_B_VAL: W12, sparx5: es0, lan966x: es0 * VID used in ES0 tag B. See TAG_B_VID_SEL. * VCAP_AF_VID_C_VAL: W12, sparx5: es0 * VID used in ES0 tag C. See TAG_C_VID_SEL. diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api.c b/drivers/net/ethernet/microchip/vcap/vcap_api.c index 5675b0962bc3..15a3a31b15d4 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api.c @@ -1021,18 +1021,32 @@ static struct vcap_rule_internal *vcap_dup_rule(struct vcap_rule_internal *ri, list_for_each_entry(ckf, &ri->data.keyfields, ctrl.list) { newckf = kmemdup(ckf, sizeof(*newckf), GFP_KERNEL); if (!newckf) - return ERR_PTR(-ENOMEM); + goto err; list_add_tail(&newckf->ctrl.list, &duprule->data.keyfields); } list_for_each_entry(caf, &ri->data.actionfields, ctrl.list) { newcaf = kmemdup(caf, sizeof(*newcaf), GFP_KERNEL); if (!newcaf) - return ERR_PTR(-ENOMEM); + goto err; list_add_tail(&newcaf->ctrl.list, &duprule->data.actionfields); } return duprule; + +err: + list_for_each_entry_safe(ckf, newckf, &duprule->data.keyfields, ctrl.list) { + list_del(&ckf->ctrl.list); + kfree(ckf); + } + + list_for_each_entry_safe(caf, newcaf, &duprule->data.actionfields, ctrl.list) { + list_del(&caf->ctrl.list); + kfree(caf); + } + + kfree(duprule); + return ERR_PTR(-ENOMEM); } static void vcap_apply_width(u8 *dst, int width, int bytes) @@ -1121,7 +1135,7 @@ static void vcap_copy_to_client_actionfield(struct vcap_rule_internal *ri, vcap_copy_from_w32be(field->data.u128.value, value, field_size, width); break; - }; + } } else { switch (field->ctrl.type) { case VCAP_FIELD_BIT: @@ -1162,7 +1176,7 @@ static void vcap_copy_to_client_actionfield(struct vcap_rule_internal *ri, value, width, field_size); break; - }; + } } } @@ -1236,7 +1250,7 @@ static void vcap_copy_to_client_keyfield(struct vcap_rule_internal *ri, vcap_copy_from_w32be(field->data.u128.mask, mask, field_size, width); break; - }; + } } else { switch (field->ctrl.type) { case VCAP_FIELD_BIT: @@ -1284,7 +1298,7 @@ static void vcap_copy_to_client_keyfield(struct vcap_rule_internal *ri, value, mask, width, field_size); break; - }; + } } } diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index 9d1507eba5b9..2bd1d74021f7 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -627,7 +627,7 @@ static int mana_hwc_establish_channel(struct gdma_context *gc, u16 *q_depth, if (WARN_ON(cq->id >= gc->max_num_cqs)) return -EPROTO; - gc->cq_table = vzalloc(gc->max_num_cqs * sizeof(struct gdma_queue *)); + gc->cq_table = vcalloc(gc->max_num_cqs, sizeof(struct gdma_queue *)); if (!gc->cq_table) return -ENOMEM; diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index d907727c7b7a..c2ad0921e893 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -8,6 +8,7 @@ #include <linux/ethtool.h> #include <linux/filter.h> #include <linux/mm.h> +#include <linux/pci.h> #include <net/checksum.h> #include <net/ip6_checksum.h> @@ -179,6 +180,14 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) pkg.tx_oob.s_oob.short_vp_offset = txq->vp_offset; } + if (skb_vlan_tag_present(skb)) { + pkt_fmt = MANA_LONG_PKT_FMT; + pkg.tx_oob.l_oob.inject_vlan_pri_tag = 1; + pkg.tx_oob.l_oob.pcp = skb_vlan_tag_get_prio(skb); + pkg.tx_oob.l_oob.dei = skb_vlan_tag_get_cfi(skb); + pkg.tx_oob.l_oob.vlan_id = skb_vlan_tag_get_id(skb); + } + pkg.tx_oob.s_oob.pkt_fmt = pkt_fmt; if (pkt_fmt == MANA_SHORT_PKT_FMT) { @@ -972,7 +981,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, bool update_tab) { u16 num_entries = MANA_INDIRECT_TABLE_SIZE; - struct mana_cfg_rx_steer_req *req = NULL; + struct mana_cfg_rx_steer_req_v2 *req; struct mana_cfg_rx_steer_resp resp = {}; struct net_device *ndev = apc->ndev; mana_handle_t *req_indir_tab; @@ -987,6 +996,8 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size, sizeof(resp)); + req->hdr.req.msg_version = GDMA_MESSAGE_V2; + req->vport = apc->port_handle; req->num_indir_entries = num_entries; req->indir_tab_offset = sizeof(*req); @@ -996,6 +1007,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, req->update_hashkey = update_key; req->update_indir_tab = update_tab; req->default_rxobj = apc->default_rxobj; + req->cqe_coalescing_enable = 0; if (update_key) memcpy(&req->hashkey, apc->hashkey, MANA_HASH_KEY_SIZE); @@ -1457,6 +1469,12 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L3); } + if (cqe->rx_vlantag_present) { + u16 vlan_tci = cqe->rx_vlan_id; + + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); + } + u64_stats_update_begin(&rx_stats->syncp); rx_stats->packets++; rx_stats->bytes += pkt_len; @@ -2328,9 +2346,12 @@ int mana_attach(struct net_device *ndev) static int mana_dealloc_queues(struct net_device *ndev) { struct mana_port_context *apc = netdev_priv(ndev); + unsigned long timeout = jiffies + 120 * HZ; struct gdma_dev *gd = apc->ac->gdma_dev; struct mana_txq *txq; + struct sk_buff *skb; int i, err; + u32 tsleep; if (apc->port_is_up) return -EINVAL; @@ -2346,15 +2367,40 @@ static int mana_dealloc_queues(struct net_device *ndev) * to false, but it doesn't matter since mana_start_xmit() drops any * new packets due to apc->port_is_up being false. * - * Drain all the in-flight TX packets + * Drain all the in-flight TX packets. + * A timeout of 120 seconds for all the queues is used. + * This will break the while loop when h/w is not responding. + * This value of 120 has been decided here considering max + * number of queues. */ + for (i = 0; i < apc->num_queues; i++) { txq = &apc->tx_qp[i].txq; - - while (atomic_read(&txq->pending_sends) > 0) - usleep_range(1000, 2000); + tsleep = 1000; + while (atomic_read(&txq->pending_sends) > 0 && + time_before(jiffies, timeout)) { + usleep_range(tsleep, tsleep + 1000); + tsleep <<= 1; + } + if (atomic_read(&txq->pending_sends)) { + err = pcie_flr(to_pci_dev(gd->gdma_context->dev)); + if (err) { + netdev_err(ndev, "flr failed %d with %d pkts pending in txq %u\n", + err, atomic_read(&txq->pending_sends), + txq->gdma_txq_id); + } + break; + } } + for (i = 0; i < apc->num_queues; i++) { + txq = &apc->tx_qp[i].txq; + while ((skb = skb_dequeue(&txq->pending_skbs))) { + mana_unmap_skb(skb, apc); + dev_kfree_skb_any(skb); + } + atomic_set(&txq->pending_sends, 0); + } /* We're 100% sure the queues can no longer be woken up, because * we're sure now mana_poll_tx_cq() can't be running. */ @@ -2451,8 +2497,9 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, ndev->hw_features |= NETIF_F_RXCSUM; ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; ndev->hw_features |= NETIF_F_RXHASH; - ndev->features = ndev->hw_features; - ndev->vlan_features = 0; + ndev->features = ndev->hw_features | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; + ndev->vlan_features = ndev->features; ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | NETDEV_XDP_ACT_NDO_XMIT; diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 1f5f00b30441..56ccbd4c37fe 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -2925,10 +2925,8 @@ int ocelot_init(struct ocelot *ocelot) } } - mutex_init(&ocelot->ptp_lock); mutex_init(&ocelot->mact_lock); mutex_init(&ocelot->fwd_domain_lock); - mutex_init(&ocelot->tas_lock); spin_lock_init(&ocelot->ptp_clock_lock); spin_lock_init(&ocelot->ts_id_lock); diff --git a/drivers/net/ethernet/mscc/ocelot_fdma.c b/drivers/net/ethernet/mscc/ocelot_fdma.c index 8e3894cf5f7c..83a3ce0c568e 100644 --- a/drivers/net/ethernet/mscc/ocelot_fdma.c +++ b/drivers/net/ethernet/mscc/ocelot_fdma.c @@ -368,7 +368,8 @@ static bool ocelot_fdma_receive_skb(struct ocelot *ocelot, struct sk_buff *skb) if (unlikely(!ndev)) return false; - pskb_trim(skb, skb->len - ETH_FCS_LEN); + if (pskb_trim(skb, skb->len - ETH_FCS_LEN)) + return false; skb->dev = ndev; skb->protocol = eth_type_trans(skb, skb->dev); diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index ee052404eb55..e0916afcddfb 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -592,6 +592,16 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress, return -EOPNOTSUPP; } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) { + struct flow_match_meta match; + + flow_rule_match_meta(rule, &match); + if (match.mask->l2_miss) { + NL_SET_ERR_MSG_MOD(extack, "Can't match on \"l2_miss\""); + return -EOPNOTSUPP; + } + } + /* For VCAP ES0 (egress rewriter) we can match on the ingress port */ if (!ingress) { ret = ocelot_flower_parse_indev(ocelot, port, f, filter); diff --git a/drivers/net/ethernet/mscc/ocelot_mm.c b/drivers/net/ethernet/mscc/ocelot_mm.c index fb3145118d68..c815ae64e39d 100644 --- a/drivers/net/ethernet/mscc/ocelot_mm.c +++ b/drivers/net/ethernet/mscc/ocelot_mm.c @@ -67,10 +67,13 @@ void ocelot_port_update_active_preemptible_tcs(struct ocelot *ocelot, int port) val = mm->preemptible_tcs; /* Cut through switching doesn't work for preemptible priorities, - * so first make sure it is disabled. + * so first make sure it is disabled. Also, changing the preemptible + * TCs affects the oversized frame dropping logic, so that needs to be + * re-triggered. And since tas_guard_bands_update() also implicitly + * calls cut_through_fwd(), we don't need to explicitly call it. */ mm->active_preemptible_tcs = val; - ocelot->ops->cut_through_fwd(ocelot); + ocelot->ops->tas_guard_bands_update(ocelot, port); dev_dbg(ocelot->dev, "port %d %s/%s, MM TX %s, preemptible TCs 0x%x, active 0x%x\n", @@ -89,17 +92,14 @@ void ocelot_port_change_fp(struct ocelot *ocelot, int port, { struct ocelot_mm_state *mm = &ocelot->mm[port]; - mutex_lock(&ocelot->fwd_domain_lock); + lockdep_assert_held(&ocelot->fwd_domain_lock); if (mm->preemptible_tcs == preemptible_tcs) - goto out_unlock; + return; mm->preemptible_tcs = preemptible_tcs; ocelot_port_update_active_preemptible_tcs(ocelot, port); - -out_unlock: - mutex_unlock(&ocelot->fwd_domain_lock); } static void ocelot_mm_update_port_status(struct ocelot *ocelot, int port) diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c index 2180ae94c744..cb32234a5bf1 100644 --- a/drivers/net/ethernet/mscc/ocelot_ptp.c +++ b/drivers/net/ethernet/mscc/ocelot_ptp.c @@ -439,8 +439,12 @@ static int ocelot_ipv6_ptp_trap_del(struct ocelot *ocelot, int port) static int ocelot_setup_ptp_traps(struct ocelot *ocelot, int port, bool l2, bool l4) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; int err; + ocelot_port->trap_proto &= ~(OCELOT_PROTO_PTP_L2 | + OCELOT_PROTO_PTP_L4); + if (l2) err = ocelot_l2_ptp_trap_add(ocelot, port); else @@ -464,6 +468,11 @@ static int ocelot_setup_ptp_traps(struct ocelot *ocelot, int port, if (err) return err; + if (l2) + ocelot_port->trap_proto |= OCELOT_PROTO_PTP_L2; + if (l4) + ocelot_port->trap_proto |= OCELOT_PROTO_PTP_L4; + return 0; err_ipv6: @@ -474,10 +483,38 @@ err_ipv4: return err; } +static int ocelot_traps_to_ptp_rx_filter(unsigned int proto) +{ + if ((proto & OCELOT_PROTO_PTP_L2) && (proto & OCELOT_PROTO_PTP_L4)) + return HWTSTAMP_FILTER_PTP_V2_EVENT; + else if (proto & OCELOT_PROTO_PTP_L2) + return HWTSTAMP_FILTER_PTP_V2_L2_EVENT; + else if (proto & OCELOT_PROTO_PTP_L4) + return HWTSTAMP_FILTER_PTP_V2_L4_EVENT; + + return HWTSTAMP_FILTER_NONE; +} + int ocelot_hwstamp_get(struct ocelot *ocelot, int port, struct ifreq *ifr) { - return copy_to_user(ifr->ifr_data, &ocelot->hwtstamp_config, - sizeof(ocelot->hwtstamp_config)) ? -EFAULT : 0; + struct ocelot_port *ocelot_port = ocelot->ports[port]; + struct hwtstamp_config cfg = {}; + + switch (ocelot_port->ptp_cmd) { + case IFH_REW_OP_TWO_STEP_PTP: + cfg.tx_type = HWTSTAMP_TX_ON; + break; + case IFH_REW_OP_ORIGIN_PTP: + cfg.tx_type = HWTSTAMP_TX_ONESTEP_SYNC; + break; + default: + cfg.tx_type = HWTSTAMP_TX_OFF; + break; + } + + cfg.rx_filter = ocelot_traps_to_ptp_rx_filter(ocelot_port->trap_proto); + + return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; } EXPORT_SYMBOL(ocelot_hwstamp_get); @@ -509,8 +546,6 @@ int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr) return -ERANGE; } - mutex_lock(&ocelot->ptp_lock); - switch (cfg.rx_filter) { case HWTSTAMP_FILTER_NONE: break; @@ -531,28 +566,14 @@ int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr) l4 = true; break; default: - mutex_unlock(&ocelot->ptp_lock); return -ERANGE; } err = ocelot_setup_ptp_traps(ocelot, port, l2, l4); - if (err) { - mutex_unlock(&ocelot->ptp_lock); + if (err) return err; - } - if (l2 && l4) - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; - else if (l2) - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; - else if (l4) - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT; - else - cfg.rx_filter = HWTSTAMP_FILTER_NONE; - - /* Commit back the result & save it */ - memcpy(&ocelot->hwtstamp_config, &cfg, sizeof(cfg)); - mutex_unlock(&ocelot->ptp_lock); + cfg.rx_filter = ocelot_traps_to_ptp_rx_filter(ocelot_port->trap_proto); return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; } @@ -824,11 +845,6 @@ int ocelot_init_timestamp(struct ocelot *ocelot, ocelot_write(ocelot, PTP_CFG_MISC_PTP_EN, PTP_CFG_MISC); - /* There is no device reconfiguration, PTP Rx stamping is always - * enabled. - */ - ocelot->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; - return 0; } EXPORT_SYMBOL(ocelot_init_timestamp); diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index c5687d94ea88..7b7e1c5b00f4 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -66,6 +66,7 @@ #include <linux/slab.h> #include <linux/prefetch.h> #include <net/checksum.h> +#include <net/gso.h> #include <net/ip.h> #include <net/tcp.h> #include <asm/byteorder.h> diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index bf6bae557158..8c6954c58a88 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -311,8 +311,6 @@ nfp_devlink_flash_update(struct devlink *devlink, } const struct devlink_ops nfp_devlink_ops = { - .port_split = nfp_devlink_port_split, - .port_unsplit = nfp_devlink_port_unsplit, .sb_pool_get = nfp_devlink_sb_pool_get, .sb_pool_set = nfp_devlink_sb_pool_set, .eswitch_mode_get = nfp_devlink_eswitch_mode_get, @@ -321,6 +319,11 @@ const struct devlink_ops nfp_devlink_ops = { .flash_update = nfp_devlink_flash_update, }; +static const struct devlink_port_ops nfp_devlink_port_ops = { + .port_split = nfp_devlink_port_split, + .port_unsplit = nfp_devlink_port_unsplit, +}; + int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port) { struct devlink_port_attrs attrs = {}; @@ -351,7 +354,8 @@ int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port) devlink = priv_to_devlink(app->pf); - return devl_port_register(devlink, &port->dl_port, port->eth_id); + return devl_port_register_with_ops(devlink, &port->dl_port, + port->eth_id, &nfp_devlink_port_ops); } void nfp_devlink_port_unregister(struct nfp_port *port) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 62f0bf91d1e1..6b1fb5708434 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -53,6 +53,8 @@ #include "crypto/crypto.h" #include "crypto/fw.h" +static int nfp_net_mc_unsync(struct net_device *netdev, const unsigned char *addr); + /** * nfp_net_get_fw_version() - Read and parse the FW version * @fw_ver: Output fw_version structure to read to @@ -598,7 +600,7 @@ nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, if (likely(!dp->ktls_tx)) return skb; - if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)) + if (!tls_is_skb_tx_device_offloaded(skb)) return skb; datalen = skb->len - skb_tcp_all_headers(skb); @@ -666,7 +668,7 @@ void nfp_net_tls_tx_undo(struct sk_buff *skb, u64 tls_handle) if (!tls_handle) return; - if (WARN_ON_ONCE(!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))) + if (WARN_ON_ONCE(!tls_is_skb_tx_device_offloaded(skb))) return; datalen = skb->len - skb_tcp_all_headers(skb); @@ -1084,6 +1086,9 @@ static int nfp_net_netdev_close(struct net_device *netdev) /* Step 2: Tell NFP */ + if (nn->cap_w1 & NFP_NET_CFG_CTRL_MCAST_FILTER) + __dev_mc_unsync(netdev, nfp_net_mc_unsync); + nfp_net_clear_config_and_disable(nn); nfp_port_configure(netdev, false); @@ -2418,6 +2423,8 @@ static void nfp_net_rss_init(struct nfp_net *nn) /* Enable IPv4/IPv6 TCP by default */ nn->rss_cfg = NFP_NET_CFG_RSS_IPV4_TCP | NFP_NET_CFG_RSS_IPV6_TCP | + NFP_NET_CFG_RSS_IPV4_UDP | + NFP_NET_CFG_RSS_IPV6_UDP | FIELD_PREP(NFP_NET_CFG_RSS_HFUNC, nn->rss_hfunc) | NFP_NET_CFG_RSS_MASK; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index dfedb52b7e70..e75cbb287625 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -436,49 +436,41 @@ static void nfp_add_media_link_mode(struct nfp_port *port, struct nfp_eth_table_port *eth_port, struct ethtool_link_ksettings *cmd) { - u64 supported_modes[2], advertised_modes[2]; - struct nfp_eth_media_buf ethm = { - .eth_index = eth_port->eth_index, - }; - struct nfp_cpp *cpp = port->app->cpp; - - if (nfp_eth_read_media(cpp, ðm)) { - bitmap_fill(port->speed_bitmap, NFP_SUP_SPEED_NUMBER); - return; - } - bitmap_zero(port->speed_bitmap, NFP_SUP_SPEED_NUMBER); - for (u32 i = 0; i < 2; i++) { - supported_modes[i] = le64_to_cpu(ethm.supported_modes[i]); - advertised_modes[i] = le64_to_cpu(ethm.advertised_modes[i]); - } - for (u32 i = 0; i < NFP_MEDIA_LINK_MODES_NUMBER; i++) { if (i < 64) { - if (supported_modes[0] & BIT_ULL(i)) { + if (eth_port->link_modes_supp[0] & BIT_ULL(i)) { __set_bit(nfp_eth_media_table[i].ethtool_link_mode, cmd->link_modes.supported); __set_bit(nfp_eth_media_table[i].speed, port->speed_bitmap); } - if (advertised_modes[0] & BIT_ULL(i)) + if (eth_port->link_modes_ad[0] & BIT_ULL(i)) __set_bit(nfp_eth_media_table[i].ethtool_link_mode, cmd->link_modes.advertising); } else { - if (supported_modes[1] & BIT_ULL(i - 64)) { + if (eth_port->link_modes_supp[1] & BIT_ULL(i - 64)) { __set_bit(nfp_eth_media_table[i].ethtool_link_mode, cmd->link_modes.supported); __set_bit(nfp_eth_media_table[i].speed, port->speed_bitmap); } - if (advertised_modes[1] & BIT_ULL(i - 64)) + if (eth_port->link_modes_ad[1] & BIT_ULL(i - 64)) __set_bit(nfp_eth_media_table[i].ethtool_link_mode, cmd->link_modes.advertising); } } + + /* We take all speeds as supported when it fails to read + * link modes due to old management firmware that doesn't + * support link modes reading or error occurring, so that + * speed change of this port is allowed. + */ + if (bitmap_empty(port->speed_bitmap, NFP_SUP_SPEED_NUMBER)) + bitmap_fill(port->speed_bitmap, NFP_SUP_SPEED_NUMBER); } /** diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h index 781edc451bd4..6e044ac04917 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h @@ -196,6 +196,9 @@ enum nfp_ethtool_link_mode_list { * subports) * @ports.is_split: is interface part of a split port * @ports.fec_modes_supported: bitmap of FEC modes supported + * + * @ports.link_modes_supp: bitmap of link modes supported + * @ports.link_modes_ad: bitmap of link modes advertised */ struct nfp_eth_table { unsigned int count; @@ -235,6 +238,9 @@ struct nfp_eth_table { bool is_split; unsigned int fec_modes_supported; + + u64 link_modes_supp[2]; + u64 link_modes_ad[2]; } ports[]; }; @@ -313,7 +319,6 @@ struct nfp_eth_media_buf { }; int nfp_nsp_read_media(struct nfp_nsp *state, void *buf, unsigned int size); -int nfp_eth_read_media(struct nfp_cpp *cpp, struct nfp_eth_media_buf *ethm); #define NFP_NSP_VERSION_BUFSZ 1024 /* reasonable size, not in the ABI */ diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c index 570ac1bb2122..9d62085d772a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c @@ -227,6 +227,30 @@ nfp_eth_calc_port_type(struct nfp_cpp *cpp, struct nfp_eth_table_port *entry) entry->port_type = PORT_DA; } +static void +nfp_eth_read_media(struct nfp_cpp *cpp, struct nfp_nsp *nsp, struct nfp_eth_table_port *entry) +{ + struct nfp_eth_media_buf ethm = { + .eth_index = entry->eth_index, + }; + unsigned int i; + int ret; + + if (!nfp_nsp_has_read_media(nsp)) + return; + + ret = nfp_nsp_read_media(nsp, ðm, sizeof(ethm)); + if (ret) { + nfp_err(cpp, "Reading media link modes failed: %d\n", ret); + return; + } + + for (i = 0; i < 2; i++) { + entry->link_modes_supp[i] = le64_to_cpu(ethm.supported_modes[i]); + entry->link_modes_ad[i] = le64_to_cpu(ethm.advertised_modes[i]); + } +} + /** * nfp_eth_read_ports() - retrieve port information * @cpp: NFP CPP handle @@ -293,8 +317,10 @@ __nfp_eth_read_ports(struct nfp_cpp *cpp, struct nfp_nsp *nsp) &table->ports[j++]); nfp_eth_calc_port_geometry(cpp, table); - for (i = 0; i < table->count; i++) + for (i = 0; i < table->count; i++) { nfp_eth_calc_port_type(cpp, &table->ports[i]); + nfp_eth_read_media(cpp, nsp, &table->ports[i]); + } kfree(entries); @@ -647,29 +673,3 @@ int __nfp_eth_set_split(struct nfp_nsp *nsp, unsigned int lanes) return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_PORT, NSP_ETH_PORT_LANES, lanes, NSP_ETH_CTRL_SET_LANES); } - -int nfp_eth_read_media(struct nfp_cpp *cpp, struct nfp_eth_media_buf *ethm) -{ - struct nfp_nsp *nsp; - int ret; - - nsp = nfp_nsp_open(cpp); - if (IS_ERR(nsp)) { - nfp_err(cpp, "Failed to access the NSP: %pe\n", nsp); - return PTR_ERR(nsp); - } - - if (!nfp_nsp_has_read_media(nsp)) { - nfp_warn(cpp, "Reading media link modes not supported. Please update flash\n"); - ret = -EOPNOTSUPP; - goto exit_close_nsp; - } - - ret = nfp_nsp_read_media(nsp, ethm, sizeof(*ethm)); - if (ret) - nfp_err(cpp, "Reading media link modes failed: %pe\n", ERR_PTR(ret)); - -exit_close_nsp: - nfp_nsp_close(nsp); - return ret; -} diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig index 4e18b64dceb9..9651cc714ef2 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig +++ b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig @@ -5,7 +5,7 @@ config PCH_GBE tristate "OKI SEMICONDUCTOR IOH(ML7223/ML7831) GbE" - depends on PCI && (X86_32 || COMPILE_TEST) + depends on PCI && (MIPS_GENERIC || X86_32 || COMPILE_TEST) depends on PTP_1588_CLOCK select MII select PTP_1588_CLOCK_PCH diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index b8678da1cce5..ab7d217b98b3 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -353,12 +353,6 @@ err_out_reset: ionic_reset(ionic); err_out_teardown: ionic_dev_teardown(ionic); - pci_clear_master(pdev); - /* Don't fail the probe for these errors, keep - * the hw interface around for inspection - */ - return 0; - err_out_unmap_bars: ionic_unmap_bars(ionic); err_out_pci_release_regions: diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index 9b2b96fa36af..3a6b0a9bc241 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -104,6 +104,15 @@ static void ionic_get_regs(struct net_device *netdev, struct ethtool_regs *regs, memcpy_fromio(p + offset, lif->ionic->idev.dev_cmd_regs->words, size); } +static void ionic_get_link_ext_stats(struct net_device *netdev, + struct ethtool_link_ext_stats *stats) +{ + struct ionic_lif *lif = netdev_priv(netdev); + + if (lif->ionic->pdev->is_physfn) + stats->link_down_events = lif->link_down_count; +} + static int ionic_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *ks) { @@ -1074,6 +1083,7 @@ static const struct ethtool_ops ionic_ethtool_ops = { .get_regs_len = ionic_get_regs_len, .get_regs = ionic_get_regs, .get_link = ethtool_op_get_link, + .get_link_ext_stats = ionic_get_link_ext_stats, .get_link_ksettings = ionic_get_link_ksettings, .set_link_ksettings = ionic_set_link_ksettings, .get_coalesce = ionic_get_coalesce, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 957027e546b3..432fb93aa801 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -168,6 +168,7 @@ static void ionic_link_status_check(struct ionic_lif *lif) } } else { if (netif_carrier_ok(netdev)) { + lif->link_down_count++; netdev_info(netdev, "Link down\n"); netif_carrier_off(netdev); } @@ -474,11 +475,6 @@ static void ionic_qcqs_free(struct ionic_lif *lif) static void ionic_link_qcq_interrupts(struct ionic_qcq *src_qcq, struct ionic_qcq *n_qcq) { - if (WARN_ON(n_qcq->flags & IONIC_QCQ_F_INTR)) { - ionic_intr_free(n_qcq->cq.lif->ionic, n_qcq->intr.index); - n_qcq->flags &= ~IONIC_QCQ_F_INTR; - } - n_qcq->intr.vector = src_qcq->intr.vector; n_qcq->intr.index = src_qcq->intr.index; n_qcq->napi_qcq = src_qcq->napi_qcq; @@ -560,7 +556,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, new->q.dev = dev; new->flags = flags; - new->q.info = vzalloc(num_descs * sizeof(*new->q.info)); + new->q.info = vcalloc(num_descs, sizeof(*new->q.info)); if (!new->q.info) { netdev_err(lif->netdev, "Cannot allocate queue info\n"); err = -ENOMEM; @@ -581,7 +577,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, if (err) goto err_out; - new->cq.info = vzalloc(num_descs * sizeof(*new->cq.info)); + new->cq.info = vcalloc(num_descs, sizeof(*new->cq.info)); if (!new->cq.info) { netdev_err(lif->netdev, "Cannot allocate completion queue info\n"); err = -ENOMEM; @@ -1821,6 +1817,7 @@ static int ionic_change_mtu(struct net_device *netdev, int new_mtu) static void ionic_tx_timeout_work(struct work_struct *ws) { struct ionic_lif *lif = container_of(ws, struct ionic_lif, tx_timeout_work); + int err; if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) return; @@ -1833,8 +1830,11 @@ static void ionic_tx_timeout_work(struct work_struct *ws) mutex_lock(&lif->queue_lock); ionic_stop_queues_reconfig(lif); - ionic_start_queues_reconfig(lif); + err = ionic_start_queues_reconfig(lif); mutex_unlock(&lif->queue_lock); + + if (err) + dev_err(lif->ionic->dev, "%s: Restarting queues failed\n", __func__); } static void ionic_tx_timeout(struct net_device *netdev, unsigned int txqueue) @@ -2804,17 +2804,22 @@ static int ionic_cmb_reconfig(struct ionic_lif *lif, if (err) { dev_err(lif->ionic->dev, "CMB restore failed: %d\n", err); - goto errout; + goto err_out; } } - ionic_start_queues_reconfig(lif); - } else { - /* This was detached in ionic_stop_queues_reconfig() */ - netif_device_attach(lif->netdev); + err = ionic_start_queues_reconfig(lif); + if (err) { + dev_err(lif->ionic->dev, + "CMB reconfig failed: %d\n", err); + goto err_out; + } } -errout: +err_out: + /* This was detached in ionic_stop_queues_reconfig() */ + netif_device_attach(lif->netdev); + return err; } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index c9c4c46d5a16..fd2ea670e7d8 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -201,6 +201,7 @@ struct ionic_lif { u64 hw_features; bool registered; u16 lif_type; + unsigned int link_down_count; unsigned int nmcast; unsigned int nucast; unsigned int nvlans; diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h index f8682356d0cf..94d4f9413ab7 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h +++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h @@ -194,6 +194,22 @@ void qed_hw_remove(struct qed_dev *cdev); struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn); /** + * qed_ptt_acquire_context(): Allocate a PTT window honoring the context + * atomicy. + * + * @p_hwfn: HW device data. + * @is_atomic: Hint from the caller - if the func can sleep or not. + * + * Context: The function should not sleep in case is_atomic == true. + * Return: struct qed_ptt. + * + * Should be called at the entry point to the driver + * (at the beginning of an exported function). + */ +struct qed_ptt *qed_ptt_acquire_context(struct qed_hwfn *p_hwfn, + bool is_atomic); + +/** * qed_ptt_release(): Release PTT Window. * * @p_hwfn: HW device data. diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c index 3764190b948e..04602ac94708 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c +++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c @@ -693,13 +693,14 @@ static void _qed_fcoe_get_pstats(struct qed_hwfn *p_hwfn, } static int qed_fcoe_get_stats(struct qed_hwfn *p_hwfn, - struct qed_fcoe_stats *p_stats) + struct qed_fcoe_stats *p_stats, + bool is_atomic) { struct qed_ptt *p_ptt; memset(p_stats, 0, sizeof(*p_stats)); - p_ptt = qed_ptt_acquire(p_hwfn); + p_ptt = qed_ptt_acquire_context(p_hwfn, is_atomic); if (!p_ptt) { DP_ERR(p_hwfn, "Failed to acquire ptt\n"); @@ -973,19 +974,27 @@ static int qed_fcoe_destroy_conn(struct qed_dev *cdev, QED_SPQ_MODE_EBLOCK, NULL); } +static int qed_fcoe_stats_context(struct qed_dev *cdev, + struct qed_fcoe_stats *stats, + bool is_atomic) +{ + return qed_fcoe_get_stats(QED_AFFIN_HWFN(cdev), stats, is_atomic); +} + static int qed_fcoe_stats(struct qed_dev *cdev, struct qed_fcoe_stats *stats) { - return qed_fcoe_get_stats(QED_AFFIN_HWFN(cdev), stats); + return qed_fcoe_stats_context(cdev, stats, false); } void qed_get_protocol_stats_fcoe(struct qed_dev *cdev, - struct qed_mcp_fcoe_stats *stats) + struct qed_mcp_fcoe_stats *stats, + bool is_atomic) { struct qed_fcoe_stats proto_stats; /* Retrieve FW statistics */ memset(&proto_stats, 0, sizeof(proto_stats)); - if (qed_fcoe_stats(cdev, &proto_stats)) { + if (qed_fcoe_stats_context(cdev, &proto_stats, is_atomic)) { DP_VERBOSE(cdev, QED_MSG_STORAGE, "Failed to collect FCoE statistics\n"); return; diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.h b/drivers/net/ethernet/qlogic/qed/qed_fcoe.h index 19c85adf4ceb..214e8299ecb4 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.h +++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.h @@ -28,8 +28,20 @@ int qed_fcoe_alloc(struct qed_hwfn *p_hwfn); void qed_fcoe_setup(struct qed_hwfn *p_hwfn); void qed_fcoe_free(struct qed_hwfn *p_hwfn); +/** + * qed_get_protocol_stats_fcoe(): Fills provided statistics + * struct with statistics. + * + * @cdev: Qed dev pointer. + * @stats: Points to struct that will be filled with statistics. + * @is_atomic: Hint from the caller - if the func can sleep or not. + * + * Context: The function should not sleep in case is_atomic == true. + * Return: Void. + */ void qed_get_protocol_stats_fcoe(struct qed_dev *cdev, - struct qed_mcp_fcoe_stats *stats); + struct qed_mcp_fcoe_stats *stats, + bool is_atomic); #else /* CONFIG_QED_FCOE */ static inline int qed_fcoe_alloc(struct qed_hwfn *p_hwfn) { @@ -40,7 +52,8 @@ static inline void qed_fcoe_setup(struct qed_hwfn *p_hwfn) {} static inline void qed_fcoe_free(struct qed_hwfn *p_hwfn) {} static inline void qed_get_protocol_stats_fcoe(struct qed_dev *cdev, - struct qed_mcp_fcoe_stats *stats) + struct qed_mcp_fcoe_stats *stats, + bool is_atomic) { } #endif /* CONFIG_QED_FCOE */ diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c index 554f30b0cfd5..6263f847b6b9 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hw.c +++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c @@ -23,7 +23,10 @@ #include "qed_reg_addr.h" #include "qed_sriov.h" -#define QED_BAR_ACQUIRE_TIMEOUT 1000 +#define QED_BAR_ACQUIRE_TIMEOUT_USLEEP_CNT 1000 +#define QED_BAR_ACQUIRE_TIMEOUT_USLEEP 1000 +#define QED_BAR_ACQUIRE_TIMEOUT_UDELAY_CNT 100000 +#define QED_BAR_ACQUIRE_TIMEOUT_UDELAY 10 /* Invalid values */ #define QED_BAR_INVALID_OFFSET (cpu_to_le32(-1)) @@ -85,11 +88,21 @@ void qed_ptt_pool_free(struct qed_hwfn *p_hwfn) struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn) { + return qed_ptt_acquire_context(p_hwfn, false); +} + +struct qed_ptt *qed_ptt_acquire_context(struct qed_hwfn *p_hwfn, bool is_atomic) +{ struct qed_ptt *p_ptt; - unsigned int i; + unsigned int i, count; + + if (is_atomic) + count = QED_BAR_ACQUIRE_TIMEOUT_UDELAY_CNT; + else + count = QED_BAR_ACQUIRE_TIMEOUT_USLEEP_CNT; /* Take the free PTT from the list */ - for (i = 0; i < QED_BAR_ACQUIRE_TIMEOUT; i++) { + for (i = 0; i < count; i++) { spin_lock_bh(&p_hwfn->p_ptt_pool->lock); if (!list_empty(&p_hwfn->p_ptt_pool->free_list)) { @@ -105,7 +118,12 @@ struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn) } spin_unlock_bh(&p_hwfn->p_ptt_pool->lock); - usleep_range(1000, 2000); + + if (is_atomic) + udelay(QED_BAR_ACQUIRE_TIMEOUT_UDELAY); + else + usleep_range(QED_BAR_ACQUIRE_TIMEOUT_USLEEP, + QED_BAR_ACQUIRE_TIMEOUT_USLEEP * 2); } DP_NOTICE(p_hwfn, "PTT acquire timeout - failed to allocate PTT\n"); diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c index 511ab214eb9c..980e7289b481 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c @@ -999,13 +999,14 @@ static void _qed_iscsi_get_pstats(struct qed_hwfn *p_hwfn, } static int qed_iscsi_get_stats(struct qed_hwfn *p_hwfn, - struct qed_iscsi_stats *stats) + struct qed_iscsi_stats *stats, + bool is_atomic) { struct qed_ptt *p_ptt; memset(stats, 0, sizeof(*stats)); - p_ptt = qed_ptt_acquire(p_hwfn); + p_ptt = qed_ptt_acquire_context(p_hwfn, is_atomic); if (!p_ptt) { DP_ERR(p_hwfn, "Failed to acquire ptt\n"); return -EAGAIN; @@ -1336,9 +1337,16 @@ static int qed_iscsi_destroy_conn(struct qed_dev *cdev, QED_SPQ_MODE_EBLOCK, NULL); } +static int qed_iscsi_stats_context(struct qed_dev *cdev, + struct qed_iscsi_stats *stats, + bool is_atomic) +{ + return qed_iscsi_get_stats(QED_AFFIN_HWFN(cdev), stats, is_atomic); +} + static int qed_iscsi_stats(struct qed_dev *cdev, struct qed_iscsi_stats *stats) { - return qed_iscsi_get_stats(QED_AFFIN_HWFN(cdev), stats); + return qed_iscsi_stats_context(cdev, stats, false); } static int qed_iscsi_change_mac(struct qed_dev *cdev, @@ -1358,13 +1366,14 @@ static int qed_iscsi_change_mac(struct qed_dev *cdev, } void qed_get_protocol_stats_iscsi(struct qed_dev *cdev, - struct qed_mcp_iscsi_stats *stats) + struct qed_mcp_iscsi_stats *stats, + bool is_atomic) { struct qed_iscsi_stats proto_stats; /* Retrieve FW statistics */ memset(&proto_stats, 0, sizeof(proto_stats)); - if (qed_iscsi_stats(cdev, &proto_stats)) { + if (qed_iscsi_stats_context(cdev, &proto_stats, is_atomic)) { DP_VERBOSE(cdev, QED_MSG_STORAGE, "Failed to collect ISCSI statistics\n"); return; diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h index dec2b00259d4..974cb8d26608 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h @@ -39,11 +39,14 @@ void qed_iscsi_free(struct qed_hwfn *p_hwfn); * * @cdev: Qed dev pointer. * @stats: Points to struct that will be filled with statistics. + * @is_atomic: Hint from the caller - if the func can sleep or not. * + * Context: The function should not sleep in case is_atomic == true. * Return: Void. */ void qed_get_protocol_stats_iscsi(struct qed_dev *cdev, - struct qed_mcp_iscsi_stats *stats); + struct qed_mcp_iscsi_stats *stats, + bool is_atomic); #else /* IS_ENABLED(CONFIG_QED_ISCSI) */ static inline int qed_iscsi_alloc(struct qed_hwfn *p_hwfn) { @@ -56,7 +59,8 @@ static inline void qed_iscsi_free(struct qed_hwfn *p_hwfn) {} static inline void qed_get_protocol_stats_iscsi(struct qed_dev *cdev, - struct qed_mcp_iscsi_stats *stats) {} + struct qed_mcp_iscsi_stats *stats, + bool is_atomic) {} #endif /* IS_ENABLED(CONFIG_QED_ISCSI) */ #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 7776d3bdd459..970b9aabbc3d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -1863,7 +1863,8 @@ static void __qed_get_vport_stats(struct qed_hwfn *p_hwfn, } static void _qed_get_vport_stats(struct qed_dev *cdev, - struct qed_eth_stats *stats) + struct qed_eth_stats *stats, + bool is_atomic) { u8 fw_vport = 0; int i; @@ -1872,10 +1873,11 @@ static void _qed_get_vport_stats(struct qed_dev *cdev, for_each_hwfn(cdev, i) { struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; - struct qed_ptt *p_ptt = IS_PF(cdev) ? qed_ptt_acquire(p_hwfn) - : NULL; + struct qed_ptt *p_ptt; bool b_get_port_stats; + p_ptt = IS_PF(cdev) ? qed_ptt_acquire_context(p_hwfn, is_atomic) + : NULL; if (IS_PF(cdev)) { /* The main vport index is relative first */ if (qed_fw_vport(p_hwfn, 0, &fw_vport)) { @@ -1901,6 +1903,13 @@ out: void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats) { + qed_get_vport_stats_context(cdev, stats, false); +} + +void qed_get_vport_stats_context(struct qed_dev *cdev, + struct qed_eth_stats *stats, + bool is_atomic) +{ u32 i; if (!cdev || cdev->recov_in_prog) { @@ -1908,7 +1917,7 @@ void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats) return; } - _qed_get_vport_stats(cdev, stats); + _qed_get_vport_stats(cdev, stats, is_atomic); if (!cdev->reset_stats) return; @@ -1960,7 +1969,7 @@ void qed_reset_vport_stats(struct qed_dev *cdev) if (!cdev->reset_stats) { DP_INFO(cdev, "Reset stats not allocated\n"); } else { - _qed_get_vport_stats(cdev, cdev->reset_stats); + _qed_get_vport_stats(cdev, cdev->reset_stats, false); cdev->reset_stats->common.link_change_count = 0; } } diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h index a538cf478c14..2d2f82c785ad 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h @@ -249,8 +249,32 @@ qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn, enum spq_mode comp_mode, struct qed_spq_comp_cb *p_comp_data); +/** + * qed_get_vport_stats(): Fills provided statistics + * struct with statistics. + * + * @cdev: Qed dev pointer. + * @stats: Points to struct that will be filled with statistics. + * + * Return: Void. + */ void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats); +/** + * qed_get_vport_stats_context(): Fills provided statistics + * struct with statistics. + * + * @cdev: Qed dev pointer. + * @stats: Points to struct that will be filled with statistics. + * @is_atomic: Hint from the caller - if the func can sleep or not. + * + * Context: The function should not sleep in case is_atomic == true. + * Return: Void. + */ +void qed_get_vport_stats_context(struct qed_dev *cdev, + struct qed_eth_stats *stats, + bool is_atomic); + void qed_reset_vport_stats(struct qed_dev *cdev); /** diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index f5af83342856..c278f8893042 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -3092,7 +3092,7 @@ void qed_get_protocol_stats(struct qed_dev *cdev, switch (type) { case QED_MCP_LAN_STATS: - qed_get_vport_stats(cdev, ð_stats); + qed_get_vport_stats_context(cdev, ð_stats, true); stats->lan_stats.ucast_rx_pkts = eth_stats.common.rx_ucast_pkts; stats->lan_stats.ucast_tx_pkts = @@ -3100,10 +3100,10 @@ void qed_get_protocol_stats(struct qed_dev *cdev, stats->lan_stats.fcs_err = -1; break; case QED_MCP_FCOE_STATS: - qed_get_protocol_stats_fcoe(cdev, &stats->fcoe_stats); + qed_get_protocol_stats_fcoe(cdev, &stats->fcoe_stats, true); break; case QED_MCP_ISCSI_STATS: - qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats); + qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats, true); break; default: DP_VERBOSE(cdev, QED_MSG_SP, diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 4b004a728190..99df00c30b8c 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -176,6 +176,15 @@ static int qede_sriov_configure(struct pci_dev *pdev, int num_vfs_param) } #endif +static int __maybe_unused qede_suspend(struct device *dev) +{ + dev_info(dev, "Device does not support suspend operation\n"); + + return -EOPNOTSUPP; +} + +static DEFINE_SIMPLE_DEV_PM_OPS(qede_pm_ops, qede_suspend, NULL); + static const struct pci_error_handlers qede_err_handler = { .error_detected = qede_io_error_detected, }; @@ -190,6 +199,7 @@ static struct pci_driver qede_pci_driver = { .sriov_configure = qede_sriov_configure, #endif .err_handler = &qede_err_handler, + .driver.pm = &qede_pm_ops, }; static struct qed_eth_cb_ops qede_ll_ops = { diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index 0d80447d4d3b..d5c688a8d7be 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -1260,8 +1260,11 @@ static int emac_tso_csum(struct emac_adapter *adpt, if (skb->protocol == htons(ETH_P_IP)) { u32 pkt_len = ((unsigned char *)ip_hdr(skb) - skb->data) + ntohs(ip_hdr(skb)->tot_len); - if (skb->len > pkt_len) - pskb_trim(skb, pkt_len); + if (skb->len > pkt_len) { + ret = pskb_trim(skb, pkt_len); + if (unlikely(ret)) + return ret; + } } hdr_len = skb_tcp_all_headers(skb); diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 4b19803a7dd0..6351a2dc13bc 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -623,6 +623,7 @@ struct rtl8169_private { int cfg9346_usage_count; unsigned supports_gmii:1; + unsigned aspm_manageable:1; dma_addr_t counters_phys_addr; struct rtl8169_counters *counters; struct rtl8169_tc_offsets tc_offset; @@ -2746,7 +2747,15 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) if (tp->mac_version < RTL_GIGA_MAC_VER_32) return; - if (enable) { + /* Don't enable ASPM in the chip if OS can't control ASPM */ + if (enable && tp->aspm_manageable) { + /* On these chip versions ASPM can even harm + * bus communication of other PCI devices. + */ + if (tp->mac_version == RTL_GIGA_MAC_VER_42 || + tp->mac_version == RTL_GIGA_MAC_VER_43) + return; + rtl_mod_config5(tp, 0, ASPM_en); rtl_mod_config2(tp, 0, ClkReqEn); @@ -4514,10 +4523,6 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) } if (napi_schedule_prep(&tp->napi)) { - rtl_unlock_config_regs(tp); - rtl_hw_aspm_clkreq_enable(tp, false); - rtl_lock_config_regs(tp); - rtl_irq_disable(tp); __napi_schedule(&tp->napi); } @@ -4577,14 +4582,9 @@ static int rtl8169_poll(struct napi_struct *napi, int budget) work_done = rtl_rx(dev, tp, budget); - if (work_done < budget && napi_complete_done(napi, work_done)) { + if (work_done < budget && napi_complete_done(napi, work_done)) rtl_irq_enable(tp); - rtl_unlock_config_regs(tp); - rtl_hw_aspm_clkreq_enable(tp, true); - rtl_lock_config_regs(tp); - } - return work_done; } @@ -5158,12 +5158,23 @@ done: rtl_rar_set(tp, mac_addr); } +/* register is set if system vendor successfully tested ASPM 1.2 */ +static bool rtl_aspm_is_safe(struct rtl8169_private *tp) +{ + if (tp->mac_version >= RTL_GIGA_MAC_VER_61 && + r8168_mac_ocp_read(tp, 0xc0b2) & 0xf) + return true; + + return false; +} + static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { struct rtl8169_private *tp; int jumbo_max, region, rc; enum mac_version chipset; struct net_device *dev; + u32 txconfig; u16 xid; dev = devm_alloc_etherdev(&pdev->dev, sizeof (*tp)); @@ -5195,40 +5206,46 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* enable device (incl. PCI PM wakeup and hotplug setup) */ rc = pcim_enable_device(pdev); - if (rc < 0) { - dev_err(&pdev->dev, "enable failure\n"); - return rc; - } + if (rc < 0) + return dev_err_probe(&pdev->dev, rc, "enable failure\n"); if (pcim_set_mwi(pdev) < 0) dev_info(&pdev->dev, "Mem-Wr-Inval unavailable\n"); /* use first MMIO region */ region = ffs(pci_select_bars(pdev, IORESOURCE_MEM)) - 1; - if (region < 0) { - dev_err(&pdev->dev, "no MMIO resource found\n"); - return -ENODEV; - } + if (region < 0) + return dev_err_probe(&pdev->dev, -ENODEV, "no MMIO resource found\n"); rc = pcim_iomap_regions(pdev, BIT(region), KBUILD_MODNAME); - if (rc < 0) { - dev_err(&pdev->dev, "cannot remap MMIO, aborting\n"); - return rc; - } + if (rc < 0) + return dev_err_probe(&pdev->dev, rc, "cannot remap MMIO, aborting\n"); tp->mmio_addr = pcim_iomap_table(pdev)[region]; - xid = (RTL_R32(tp, TxConfig) >> 20) & 0xfcf; + txconfig = RTL_R32(tp, TxConfig); + if (txconfig == ~0U) + return dev_err_probe(&pdev->dev, -EIO, "PCI read failed\n"); + + xid = (txconfig >> 20) & 0xfcf; /* Identify chip attached to board */ chipset = rtl8169_get_mac_version(xid, tp->supports_gmii); - if (chipset == RTL_GIGA_MAC_NONE) { - dev_err(&pdev->dev, "unknown chip XID %03x, contact r8169 maintainers (see MAINTAINERS file)\n", xid); - return -ENODEV; - } - + if (chipset == RTL_GIGA_MAC_NONE) + return dev_err_probe(&pdev->dev, -ENODEV, + "unknown chip XID %03x, contact r8169 maintainers (see MAINTAINERS file)\n", + xid); tp->mac_version = chipset; + /* Disable ASPM L1 as that cause random device stop working + * problems as well as full system hangs for some PCIe devices users. + */ + if (rtl_aspm_is_safe(tp)) + rc = 0; + else + rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1); + tp->aspm_manageable = !rc; + tp->dash_type = rtl_check_dash(tp); tp->cp_cmd = RTL_R16(tp, CPlusCmd) & CPCMD_MASK; @@ -5246,10 +5263,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rtl_hw_reset(tp); rc = rtl_alloc_irq(tp); - if (rc < 0) { - dev_err(&pdev->dev, "Can't allocate interrupt\n"); - return rc; - } + if (rc < 0) + return dev_err_probe(&pdev->dev, rc, "Can't allocate interrupt\n"); + tp->irq = pci_irq_vector(pdev, 0); INIT_WORK(&tp->wk.work, rtl_task); diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index fa6d6202b129..449ed1f5624c 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -90,6 +90,11 @@ static int rswitch_bpool_config(struct rswitch_private *priv) return rswitch_reg_wait(priv->addr, CABPIRM, CABPIRM_BPR, CABPIRM_BPR); } +static void rswitch_coma_init(struct rswitch_private *priv) +{ + iowrite32(CABPPFLC_INIT_VALUE, priv->addr + CABPPFLC0); +} + /* R-Switch-2 block (TOP) */ static void rswitch_top_init(struct rswitch_private *priv) { @@ -156,24 +161,6 @@ static int rswitch_gwca_axi_ram_reset(struct rswitch_private *priv) return rswitch_reg_wait(priv->addr, GWARIRM, GWARIRM_ARR, GWARIRM_ARR); } -static void rswitch_gwca_set_rate_limit(struct rswitch_private *priv, int rate) -{ - u32 gwgrlulc, gwgrlc; - - switch (rate) { - case 1000: - gwgrlulc = 0x0000005f; - gwgrlc = 0x00010260; - break; - default: - dev_err(&priv->pdev->dev, "%s: This rate is not supported (%d)\n", __func__, rate); - return; - } - - iowrite32(gwgrlulc, priv->addr + GWGRLULC); - iowrite32(gwgrlc, priv->addr + GWGRLC); -} - static bool rswitch_is_any_data_irq(struct rswitch_private *priv, u32 *dis, bool tx) { u32 *mask = tx ? priv->gwca.tx_irq_bits : priv->gwca.rx_irq_bits; @@ -391,7 +378,7 @@ static int rswitch_gwca_queue_format(struct net_device *ndev, linkfix->die_dt = DT_LINKFIX; rswitch_desc_set_dptr(linkfix, gq->ring_dma); - iowrite32(GWDCC_BALR | (gq->dir_tx ? GWDCC_DQT : 0) | GWDCC_EDE, + iowrite32(GWDCC_BALR | (gq->dir_tx ? GWDCC_DCP(GWCA_IPV_NUM) | GWDCC_DQT : 0) | GWDCC_EDE, priv->addr + GWDCC_OFFS(gq->index)); return 0; @@ -489,7 +476,8 @@ static int rswitch_gwca_queue_ext_ts_format(struct net_device *ndev, linkfix->die_dt = DT_LINKFIX; rswitch_desc_set_dptr(linkfix, gq->ring_dma); - iowrite32(GWDCC_BALR | (gq->dir_tx ? GWDCC_DQT : 0) | GWDCC_ETS | GWDCC_EDE, + iowrite32(GWDCC_BALR | (gq->dir_tx ? GWDCC_DCP(GWCA_IPV_NUM) | GWDCC_DQT : 0) | + GWDCC_ETS | GWDCC_EDE, priv->addr + GWDCC_OFFS(gq->index)); return 0; @@ -660,7 +648,8 @@ static int rswitch_gwca_hw_init(struct rswitch_private *priv) iowrite32(lower_32_bits(priv->gwca.ts_queue.ring_dma), priv->addr + GWTDCAC10); iowrite32(upper_32_bits(priv->gwca.ts_queue.ring_dma), priv->addr + GWTDCAC00); iowrite32(GWCA_TS_IRQ_BIT, priv->addr + GWTSDCC0); - rswitch_gwca_set_rate_limit(priv, priv->gwca.speed); + + iowrite32(GWTPC_PPPL(GWCA_IPV_NUM), priv->addr + GWTPC0); for (i = 0; i < RSWITCH_NUM_PORTS; i++) { err = rswitch_rxdmac_init(priv, i); @@ -740,7 +729,7 @@ static bool rswitch_rx(struct net_device *ndev, int *quota) } skb_put(skb, pkt_len); skb->protocol = eth_type_trans(skb, ndev); - netif_receive_skb(skb); + napi_gro_receive(&rdev->napi, skb); rdev->ndev->stats.rx_packets++; rdev->ndev->stats.rx_bytes += pkt_len; @@ -827,10 +816,10 @@ retry: netif_wake_subqueue(ndev, 0); - napi_complete(napi); - - rswitch_enadis_data_irq(priv, rdev->tx_queue->index, true); - rswitch_enadis_data_irq(priv, rdev->rx_queue->index, true); + if (napi_complete_done(napi, budget - quota)) { + rswitch_enadis_data_irq(priv, rdev->tx_queue->index, true); + rswitch_enadis_data_irq(priv, rdev->rx_queue->index, true); + } out: return budget - quota; @@ -1513,7 +1502,8 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd rswitch_desc_set_dptr(&desc->desc, dma_addr); desc->desc.info_ds = cpu_to_le16(skb->len); - desc->info1 = cpu_to_le64(INFO1_DV(BIT(rdev->etha->index)) | INFO1_FMT); + desc->info1 = cpu_to_le64(INFO1_DV(BIT(rdev->etha->index)) | + INFO1_IPV(GWCA_IPV_NUM) | INFO1_FMT); if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) { struct rswitch_gwca_ts_info *ts_info; @@ -1783,6 +1773,8 @@ static int rswitch_init(struct rswitch_private *priv) if (err < 0) return err; + rswitch_coma_init(priv); + err = rswitch_gwca_linkfix_alloc(priv); if (err < 0) return -ENOMEM; diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h index b3e0411b408e..bb9ed971a97c 100644 --- a/drivers/net/ethernet/renesas/rswitch.h +++ b/drivers/net/ethernet/renesas/rswitch.h @@ -48,6 +48,7 @@ #define GWCA_NUM_IRQS 8 #define GWCA_INDEX 0 #define AGENT_INDEX_GWCA 3 +#define GWCA_IPV_NUM 0 #define GWRO RSWITCH_GWCA0_OFFSET #define GWCA_TS_IRQ_RESOURCE_NAME "gwca0_rxts0" @@ -768,11 +769,14 @@ enum rswitch_gwca_mode { #define GWARIRM_ARR BIT(1) #define GWDCC_BALR BIT(24) +#define GWDCC_DCP_MASK GENMASK(18, 16) +#define GWDCC_DCP(prio) FIELD_PREP(GWDCC_DCP_MASK, (prio)) #define GWDCC_DQT BIT(11) #define GWDCC_ETS BIT(9) #define GWDCC_EDE BIT(8) #define GWTRC(queue) (GWTRC0 + (queue) / 32 * 4) +#define GWTPC_PPPL(ipv) BIT(ipv) #define GWDCC_OFFS(queue) (GWDCC0 + (queue) * 4) #define GWDIS(i) (GWDIS0 + (i) * 0x10) @@ -789,6 +793,8 @@ enum rswitch_gwca_mode { #define CABPIRM_BPIOG BIT(0) #define CABPIRM_BPR BIT(1) +#define CABPPFLC_INIT_VALUE 0x00800080 + /* MFWD */ #define FWPC0_LTHTA BIT(0) #define FWPC0_IP4UE BIT(3) @@ -863,6 +869,7 @@ enum DIE_DT { /* For transmission */ #define INFO1_TSUN(val) ((u64)(val) << 8ULL) +#define INFO1_IPV(prio) ((u64)(prio) << 28ULL) #define INFO1_CSD0(index) ((u64)(index) << 32ULL) #define INFO1_CSD1(index) ((u64)(index) << 40ULL) #define INFO1_DV(port_vector) ((u64)(port_vector) << 48ULL) diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h b/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h index 0f45107db8dd..d14e0cfc3a6b 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h @@ -511,7 +511,7 @@ struct sxgbe_priv_data { struct sxgbe_priv_data *sxgbe_drv_probe(struct device *device, struct sxgbe_plat_data *plat_dat, void __iomem *addr); -int sxgbe_drv_remove(struct net_device *ndev); +void sxgbe_drv_remove(struct net_device *ndev); void sxgbe_set_ethtool_ops(struct net_device *netdev); int sxgbe_mdio_unregister(struct net_device *ndev); int sxgbe_mdio_register(struct net_device *ndev); diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index 9664f029fa16..71439825ea4e 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -2203,7 +2203,7 @@ error_free_netdev: * Description: this function resets the TX/RX processes, disables the MAC RX/TX * changes the link status, releases the DMA descriptor rings. */ -int sxgbe_drv_remove(struct net_device *ndev) +void sxgbe_drv_remove(struct net_device *ndev) { struct sxgbe_priv_data *priv = netdev_priv(ndev); u8 queue_num; @@ -2231,8 +2231,6 @@ int sxgbe_drv_remove(struct net_device *ndev) kfree(priv->hw); free_netdev(ndev); - - return 0; } #ifdef CONFIG_PM diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c index 4e5526303f07..fb59ff94509a 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c @@ -172,9 +172,10 @@ err_out: static int sxgbe_platform_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); - int ret = sxgbe_drv_remove(ndev); - return ret; + sxgbe_drv_remove(ndev); + + return 0; } #ifdef CONFIG_PM diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig index 4af36ba8906b..3eb55dcfa8a6 100644 --- a/drivers/net/ethernet/sfc/Kconfig +++ b/drivers/net/ethernet/sfc/Kconfig @@ -50,6 +50,7 @@ config SFC_MCDI_MON config SFC_SRIOV bool "Solarflare SFC9100-family SR-IOV support" depends on SFC && PCI_IOV + depends on INET default y help This enables support for the Single Root I/O Virtualization diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile index 55b9c73cd8ef..16293b58e0a8 100644 --- a/drivers/net/ethernet/sfc/Makefile +++ b/drivers/net/ethernet/sfc/Makefile @@ -10,7 +10,8 @@ sfc-y += efx.o efx_common.o efx_channels.o nic.o \ efx_devlink.o sfc-$(CONFIG_SFC_MTD) += mtd.o sfc-$(CONFIG_SFC_SRIOV) += sriov.o ef10_sriov.o ef100_sriov.o ef100_rep.o \ - mae.o tc.o tc_bindings.o tc_counters.o + mae.o tc.o tc_bindings.o tc_counters.o \ + tc_encap_actions.o obj-$(CONFIG_SFC) += sfc.o diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index b63e47af6365..8c019f382a7f 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1297,8 +1297,10 @@ static void efx_ef10_fini_nic(struct efx_nic *efx) { struct efx_ef10_nic_data *nic_data = efx->nic_data; + spin_lock_bh(&efx->stats_lock); kfree(nic_data->mc_stats); nic_data->mc_stats = NULL; + spin_unlock_bh(&efx->stats_lock); } static int efx_ef10_init_nic(struct efx_nic *efx) @@ -1852,9 +1854,14 @@ static size_t efx_ef10_update_stats_pf(struct efx_nic *efx, u64 *full_stats, efx_ef10_get_stat_mask(efx, mask); - efx_nic_copy_stats(efx, nic_data->mc_stats); - efx_nic_update_stats(efx_ef10_stat_desc, EF10_STAT_COUNT, - mask, stats, nic_data->mc_stats, false); + /* If NIC was fini'd (probably resetting), then we can't read + * updated stats right now. + */ + if (nic_data->mc_stats) { + efx_nic_copy_stats(efx, nic_data->mc_stats); + efx_nic_update_stats(efx_ef10_stat_desc, EF10_STAT_COUNT, + mask, stats, nic_data->mc_stats, false); + } /* Update derived statistics */ efx_nic_fix_nodesc_drop_stat(efx, diff --git a/drivers/net/ethernet/sfc/ef100_netdev.c b/drivers/net/ethernet/sfc/ef100_netdev.c index be395cd8770b..7f7d560cb2b4 100644 --- a/drivers/net/ethernet/sfc/ef100_netdev.c +++ b/drivers/net/ethernet/sfc/ef100_netdev.c @@ -24,6 +24,7 @@ #include "rx_common.h" #include "ef100_sriov.h" #include "tc_bindings.h" +#include "tc_encap_actions.h" #include "efx_devlink.h" static void ef100_update_name(struct efx_nic *efx) @@ -40,19 +41,26 @@ static int ef100_alloc_vis(struct efx_nic *efx, unsigned int *allocated_vis) unsigned int tx_vis = efx->n_tx_channels + efx->n_extra_tx_channels; unsigned int rx_vis = efx->n_rx_channels; unsigned int min_vis, max_vis; + int rc; EFX_WARN_ON_PARANOID(efx->tx_queues_per_channel != 1); tx_vis += efx->n_xdp_channels * efx->xdp_tx_per_channel; max_vis = max(rx_vis, tx_vis); - /* Currently don't handle resource starvation and only accept - * our maximum needs and no less. + /* We require at least a single complete TX channel worth of queues. */ + min_vis = efx->tx_queues_per_channel; + + rc = efx_mcdi_alloc_vis(efx, min_vis, max_vis, + NULL, allocated_vis); + + /* We retry allocating VIs by reallocating channels when we have not + * been able to allocate the maximum VIs. */ - min_vis = max_vis; + if (!rc && *allocated_vis < max_vis) + rc = -EAGAIN; - return efx_mcdi_alloc_vis(efx, min_vis, max_vis, - NULL, allocated_vis); + return rc; } static int ef100_remap_bar(struct efx_nic *efx, int max_vis) @@ -133,9 +141,41 @@ static int ef100_net_open(struct net_device *net_dev) goto fail; rc = ef100_alloc_vis(efx, &allocated_vis); - if (rc) + if (rc && rc != -EAGAIN) goto fail; + /* Try one more time but with the maximum number of channels + * equal to the allocated VIs, which would more likely succeed. + */ + if (rc == -EAGAIN) { + rc = efx_mcdi_free_vis(efx); + if (rc) + goto fail; + + efx_remove_interrupts(efx); + efx->max_channels = allocated_vis; + + rc = efx_probe_interrupts(efx); + if (rc) + goto fail; + + rc = efx_set_channels(efx); + if (rc) + goto fail; + + rc = ef100_alloc_vis(efx, &allocated_vis); + if (rc && rc != -EAGAIN) + goto fail; + + /* It should be very unlikely that we failed here again, but in + * such a case we return ENOSPC. + */ + if (rc == -EAGAIN) { + rc = -ENOSPC; + goto fail; + } + } + rc = efx_probe_channels(efx); if (rc) return rc; @@ -261,14 +301,38 @@ int ef100_netdev_event(struct notifier_block *this, { struct efx_nic *efx = container_of(this, struct efx_nic, netdev_notifier); struct net_device *net_dev = netdev_notifier_info_to_dev(ptr); + struct ef100_nic_data *nic_data = efx->nic_data; + int err; if (efx->net_dev == net_dev && (event == NETDEV_CHANGENAME || event == NETDEV_REGISTER)) ef100_update_name(efx); + if (!nic_data->grp_mae) + return NOTIFY_DONE; + err = efx_tc_netdev_event(efx, event, net_dev); + if (err & NOTIFY_STOP_MASK) + return err; + return NOTIFY_DONE; } +static int ef100_netevent_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct efx_nic *efx = container_of(this, struct efx_nic, netevent_notifier); + struct ef100_nic_data *nic_data = efx->nic_data; + int err; + + if (!nic_data->grp_mae) + return NOTIFY_DONE; + err = efx_tc_netevent_event(efx, event, ptr); + if (err & NOTIFY_STOP_MASK) + return err; + + return NOTIFY_DONE; +}; + static int ef100_register_netdev(struct efx_nic *efx) { struct net_device *net_dev = efx->net_dev; @@ -328,6 +392,7 @@ void ef100_remove_netdev(struct efx_probe_data *probe_data) rtnl_unlock(); unregister_netdevice_notifier(&efx->netdev_notifier); + unregister_netevent_notifier(&efx->netevent_notifier); #if defined(CONFIG_SFC_SRIOV) if (!efx->type->is_vf) efx_ef100_pci_sriov_disable(efx, true); @@ -448,6 +513,14 @@ int ef100_probe_netdev(struct efx_probe_data *probe_data) goto fail; } + efx->netevent_notifier.notifier_call = ef100_netevent_event; + rc = register_netevent_notifier(&efx->netevent_notifier); + if (rc) { + netif_err(efx, probe, efx->net_dev, + "Failed to register netevent notifier, rc=%d\n", rc); + goto fail; + } + efx_probe_devlink_unlock(efx); return rc; fail: diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c index 7adde9639c8a..35d8e9811998 100644 --- a/drivers/net/ethernet/sfc/ef100_nic.c +++ b/drivers/net/ethernet/sfc/ef100_nic.c @@ -1194,7 +1194,7 @@ int ef100_probe_netdev_pf(struct efx_nic *efx) net_dev->features |= NETIF_F_HW_TC; efx->fixed_features |= NETIF_F_HW_TC; } - return rc; + return 0; } int ef100_probe_vf(struct efx_nic *efx) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index a4f22d8e6ac7..d670a319b379 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -32,6 +32,7 @@ #include "io.h" #include "selftest.h" #include "sriov.h" +#include "efx_devlink.h" #include "mcdi_port_common.h" #include "mcdi_pcol.h" @@ -877,6 +878,7 @@ static void efx_pci_remove(struct pci_dev *pci_dev) if (efx->type->sriov_fini) efx->type->sriov_fini(efx); + efx_fini_devlink_lock(efx); efx_unregister_netdev(efx); efx_mtd_remove(efx); @@ -886,6 +888,7 @@ static void efx_pci_remove(struct pci_dev *pci_dev) efx_fini_io(efx); pci_dbg(efx->pci_dev, "shutdown successful\n"); + efx_fini_devlink_and_unlock(efx); efx_fini_struct(efx); free_netdev(efx->net_dev); probe_data = container_of(efx, struct efx_probe_data, efx); @@ -1025,7 +1028,13 @@ static int efx_pci_probe_post_io(struct efx_nic *efx) NETDEV_XDP_ACT_REDIRECT | NETDEV_XDP_ACT_NDO_XMIT; + /* devlink creation, registration and lock */ + rc = efx_probe_devlink_and_lock(efx); + if (rc) + pci_err(efx->pci_dev, "devlink registration failed"); + rc = efx_register_netdev(efx); + efx_probe_devlink_unlock(efx); if (!rc) return 0; diff --git a/drivers/net/ethernet/sfc/efx_devlink.c b/drivers/net/ethernet/sfc/efx_devlink.c index ef9971cbb695..3cd750820fdd 100644 --- a/drivers/net/ethernet/sfc/efx_devlink.c +++ b/drivers/net/ethernet/sfc/efx_devlink.c @@ -25,40 +25,6 @@ struct efx_devlink { }; #ifdef CONFIG_SFC_SRIOV -static void efx_devlink_del_port(struct devlink_port *dl_port) -{ - if (!dl_port) - return; - devl_port_unregister(dl_port); -} - -static int efx_devlink_add_port(struct efx_nic *efx, - struct mae_mport_desc *mport) -{ - bool external = false; - - if (!ef100_mport_on_local_intf(efx, mport)) - external = true; - - switch (mport->mport_type) { - case MAE_MPORT_DESC_MPORT_TYPE_VNIC: - if (mport->vf_idx != MAE_MPORT_DESC_VF_IDX_NULL) - devlink_port_attrs_pci_vf_set(&mport->dl_port, 0, mport->pf_idx, - mport->vf_idx, - external); - else - devlink_port_attrs_pci_pf_set(&mport->dl_port, 0, mport->pf_idx, - external); - break; - default: - /* MAE_MPORT_DESC_MPORT_ALIAS and UNDEFINED */ - return 0; - } - - mport->dl_port.index = mport->mport_id; - - return devl_port_register(efx->devlink, &mport->dl_port, mport->mport_id); -} static int efx_devlink_port_addr_get(struct devlink_port *port, u8 *hw_addr, int *hw_addr_len, @@ -158,6 +124,48 @@ static int efx_devlink_port_addr_set(struct devlink_port *port, return rc; } +static const struct devlink_port_ops sfc_devlink_port_ops = { + .port_fn_hw_addr_get = efx_devlink_port_addr_get, + .port_fn_hw_addr_set = efx_devlink_port_addr_set, +}; + +static void efx_devlink_del_port(struct devlink_port *dl_port) +{ + if (!dl_port) + return; + devl_port_unregister(dl_port); +} + +static int efx_devlink_add_port(struct efx_nic *efx, + struct mae_mport_desc *mport) +{ + bool external = false; + + if (!ef100_mport_on_local_intf(efx, mport)) + external = true; + + switch (mport->mport_type) { + case MAE_MPORT_DESC_MPORT_TYPE_VNIC: + if (mport->vf_idx != MAE_MPORT_DESC_VF_IDX_NULL) + devlink_port_attrs_pci_vf_set(&mport->dl_port, 0, mport->pf_idx, + mport->vf_idx, + external); + else + devlink_port_attrs_pci_pf_set(&mport->dl_port, 0, mport->pf_idx, + external); + break; + default: + /* MAE_MPORT_DESC_MPORT_ALIAS and UNDEFINED */ + return 0; + } + + mport->dl_port.index = mport->mport_id; + + return devl_port_register_with_ops(efx->devlink, &mport->dl_port, + mport->mport_id, + &sfc_devlink_port_ops); +} + #endif static int efx_devlink_info_nvram_partition(struct efx_nic *efx, @@ -609,10 +617,6 @@ static int efx_devlink_info_get(struct devlink *devlink, static const struct devlink_ops sfc_devlink_ops = { .info_get = efx_devlink_info_get, -#ifdef CONFIG_SFC_SRIOV - .port_function_hw_addr_get = efx_devlink_port_addr_get, - .port_function_hw_addr_set = efx_devlink_port_addr_set, -#endif }; #ifdef CONFIG_SFC_SRIOV @@ -622,6 +626,9 @@ static struct devlink_port *ef100_set_devlink_port(struct efx_nic *efx, u32 idx) u32 id; int rc; + if (!efx->mae) + return NULL; + if (efx_mae_lookup_mport(efx, idx, &id)) { /* This should not happen. */ if (idx == MAE_MPORT_DESC_VF_IDX_NULL) diff --git a/drivers/net/ethernet/sfc/falcon/selftest.c b/drivers/net/ethernet/sfc/falcon/selftest.c index 6a454ac6f876..c3dc88e6c26c 100644 --- a/drivers/net/ethernet/sfc/falcon/selftest.c +++ b/drivers/net/ethernet/sfc/falcon/selftest.c @@ -39,12 +39,17 @@ * Falcon only performs RSS on TCP/UDP packets. */ struct ef4_loopback_payload { - struct ethhdr header; - struct iphdr ip; - struct udphdr udp; - __be16 iteration; - char msg[64]; -} __packed; + char pad[2]; /* Ensures ip is 4-byte aligned */ + struct_group_attr(packet, __packed, + struct ethhdr header; + struct iphdr ip; + struct udphdr udp; + __be16 iteration; + char msg[64]; + ); +} __packed __aligned(4); +#define EF4_LOOPBACK_PAYLOAD_LEN \ + sizeof_field(struct ef4_loopback_payload, packet) /* Loopback test source MAC address */ static const u8 payload_source[ETH_ALEN] __aligned(2) = { @@ -284,7 +289,7 @@ void ef4_loopback_rx_packet(struct ef4_nic *efx, const char *buf_ptr, int pkt_len) { struct ef4_loopback_state *state = efx->loopback_selftest; - struct ef4_loopback_payload *received; + struct ef4_loopback_payload received; struct ef4_loopback_payload *payload; BUG_ON(!buf_ptr); @@ -295,13 +300,14 @@ void ef4_loopback_rx_packet(struct ef4_nic *efx, payload = &state->payload; - received = (struct ef4_loopback_payload *) buf_ptr; - received->ip.saddr = payload->ip.saddr; + memcpy(&received.packet, buf_ptr, + min_t(int, pkt_len, EF4_LOOPBACK_PAYLOAD_LEN)); + received.ip.saddr = payload->ip.saddr; if (state->offload_csum) - received->ip.check = payload->ip.check; + received.ip.check = payload->ip.check; /* Check that header exists */ - if (pkt_len < sizeof(received->header)) { + if (pkt_len < sizeof(received.header)) { netif_err(efx, drv, efx->net_dev, "saw runt RX packet (length %d) in %s loopback " "test\n", pkt_len, LOOPBACK_MODE(efx)); @@ -309,7 +315,7 @@ void ef4_loopback_rx_packet(struct ef4_nic *efx, } /* Check that the ethernet header exists */ - if (memcmp(&received->header, &payload->header, ETH_HLEN) != 0) { + if (memcmp(&received.header, &payload->header, ETH_HLEN) != 0) { netif_err(efx, drv, efx->net_dev, "saw non-loopback RX packet in %s loopback test\n", LOOPBACK_MODE(efx)); @@ -317,16 +323,16 @@ void ef4_loopback_rx_packet(struct ef4_nic *efx, } /* Check packet length */ - if (pkt_len != sizeof(*payload)) { + if (pkt_len != EF4_LOOPBACK_PAYLOAD_LEN) { netif_err(efx, drv, efx->net_dev, "saw incorrect RX packet length %d (wanted %d) in " - "%s loopback test\n", pkt_len, (int)sizeof(*payload), - LOOPBACK_MODE(efx)); + "%s loopback test\n", pkt_len, + (int)EF4_LOOPBACK_PAYLOAD_LEN, LOOPBACK_MODE(efx)); goto err; } /* Check that IP header matches */ - if (memcmp(&received->ip, &payload->ip, sizeof(payload->ip)) != 0) { + if (memcmp(&received.ip, &payload->ip, sizeof(payload->ip)) != 0) { netif_err(efx, drv, efx->net_dev, "saw corrupted IP header in %s loopback test\n", LOOPBACK_MODE(efx)); @@ -334,7 +340,7 @@ void ef4_loopback_rx_packet(struct ef4_nic *efx, } /* Check that msg and padding matches */ - if (memcmp(&received->msg, &payload->msg, sizeof(received->msg)) != 0) { + if (memcmp(&received.msg, &payload->msg, sizeof(received.msg)) != 0) { netif_err(efx, drv, efx->net_dev, "saw corrupted RX packet in %s loopback test\n", LOOPBACK_MODE(efx)); @@ -342,10 +348,10 @@ void ef4_loopback_rx_packet(struct ef4_nic *efx, } /* Check that iteration matches */ - if (received->iteration != payload->iteration) { + if (received.iteration != payload->iteration) { netif_err(efx, drv, efx->net_dev, "saw RX packet from iteration %d (wanted %d) in " - "%s loopback test\n", ntohs(received->iteration), + "%s loopback test\n", ntohs(received.iteration), ntohs(payload->iteration), LOOPBACK_MODE(efx)); goto err; } @@ -365,7 +371,8 @@ void ef4_loopback_rx_packet(struct ef4_nic *efx, buf_ptr, pkt_len, 0); netif_err(efx, drv, efx->net_dev, "expected packet:\n"); print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 0x10, 1, - &state->payload, sizeof(state->payload), 0); + &state->payload.packet, EF4_LOOPBACK_PAYLOAD_LEN, + 0); } #endif atomic_inc(&state->rx_bad); @@ -387,14 +394,15 @@ static void ef4_iterate_state(struct ef4_nic *efx) payload->ip.daddr = htonl(INADDR_LOOPBACK); payload->ip.ihl = 5; payload->ip.check = (__force __sum16) htons(0xdead); - payload->ip.tot_len = htons(sizeof(*payload) - sizeof(struct ethhdr)); + payload->ip.tot_len = htons(sizeof(*payload) - + offsetof(struct ef4_loopback_payload, ip)); payload->ip.version = IPVERSION; payload->ip.protocol = IPPROTO_UDP; /* Initialise udp header */ payload->udp.source = 0; - payload->udp.len = htons(sizeof(*payload) - sizeof(struct ethhdr) - - sizeof(struct iphdr)); + payload->udp.len = htons(sizeof(*payload) - + offsetof(struct ef4_loopback_payload, udp)); payload->udp.check = 0; /* checksum ignored */ /* Fill out payload */ @@ -431,6 +439,10 @@ static int ef4_begin_loopback(struct ef4_tx_queue *tx_queue) payload = skb_put(skb, sizeof(state->payload)); memcpy(payload, &state->payload, sizeof(state->payload)); payload->ip.saddr = htonl(INADDR_LOOPBACK | (i << 2)); + /* Strip off the leading padding */ + skb_pull(skb, offsetof(struct ef4_loopback_payload, header)); + /* Strip off the trailing padding */ + skb_trim(skb, EF4_LOOPBACK_PAYLOAD_LEN); /* Ensure everything we've written is visible to the * interrupt handler. */ diff --git a/drivers/net/ethernet/sfc/mae.c b/drivers/net/ethernet/sfc/mae.c index 49706a7b94bf..0cab508f2f9d 100644 --- a/drivers/net/ethernet/sfc/mae.c +++ b/drivers/net/ethernet/sfc/mae.c @@ -15,6 +15,7 @@ #include "mcdi.h" #include "mcdi_pcol.h" #include "mcdi_pcol_mae.h" +#include "tc_encap_actions.h" int efx_mae_allocate_mport(struct efx_nic *efx, u32 *id, u32 *label) { @@ -482,12 +483,14 @@ int efx_mae_match_check_caps(struct efx_nic *efx, rc; \ }) /* Checks that the fields needed for encap-rule matches are supported by the - * MAE. All the fields are exact-match. + * MAE. All the fields are exact-match, except possibly ENC_IP_TOS. */ int efx_mae_check_encap_match_caps(struct efx_nic *efx, bool ipv6, + u8 ip_tos_mask, __be16 udp_sport_mask, struct netlink_ext_ack *extack) { u8 *supported_fields = efx->tc->caps->outer_rule_fields; + enum mask_type typ; int rc; if (CHECK(ENC_ETHER_TYPE)) @@ -504,6 +507,22 @@ int efx_mae_check_encap_match_caps(struct efx_nic *efx, bool ipv6, if (CHECK(ENC_L4_DPORT) || CHECK(ENC_IP_PROTO)) return rc; + typ = classify_mask((const u8 *)&udp_sport_mask, sizeof(udp_sport_mask)); + rc = efx_mae_match_check_cap_typ(supported_fields[MAE_FIELD_ENC_L4_SPORT], + typ); + if (rc) { + NL_SET_ERR_MSG_FMT_MOD(extack, "No support for %s mask in field %s", + mask_type_name(typ), "enc_src_port"); + return rc; + } + typ = classify_mask(&ip_tos_mask, sizeof(ip_tos_mask)); + rc = efx_mae_match_check_cap_typ(supported_fields[MAE_FIELD_ENC_IP_TOS], + typ); + if (rc) { + NL_SET_ERR_MSG_FMT_MOD(extack, "No support for %s mask in field %s", + mask_type_name(typ), "enc_ip_tos"); + return rc; + } return 0; } #undef CHECK @@ -592,6 +611,87 @@ static int efx_mae_encap_type_to_mae_type(enum efx_encap_type type) } } +int efx_mae_allocate_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_MAE_ENCAP_HEADER_ALLOC_IN_LEN(EFX_TC_MAX_ENCAP_HDR)); + MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_ENCAP_HEADER_ALLOC_OUT_LEN); + size_t inlen, outlen; + int rc; + + rc = efx_mae_encap_type_to_mae_type(encap->type); + if (rc < 0) + return rc; + MCDI_SET_DWORD(inbuf, MAE_ENCAP_HEADER_ALLOC_IN_ENCAP_TYPE, rc); + inlen = MC_CMD_MAE_ENCAP_HEADER_ALLOC_IN_LEN(encap->encap_hdr_len); + if (WARN_ON(inlen > sizeof(inbuf))) /* can't happen */ + return -EINVAL; + memcpy(MCDI_PTR(inbuf, MAE_ENCAP_HEADER_ALLOC_IN_HDR_DATA), + encap->encap_hdr, + encap->encap_hdr_len); + rc = efx_mcdi_rpc(efx, MC_CMD_MAE_ENCAP_HEADER_ALLOC, inbuf, + inlen, outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + if (outlen < sizeof(outbuf)) + return -EIO; + encap->fw_id = MCDI_DWORD(outbuf, MAE_ENCAP_HEADER_ALLOC_OUT_ENCAP_HEADER_ID); + return 0; +} + +int efx_mae_update_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_MAE_ENCAP_HEADER_UPDATE_IN_LEN(EFX_TC_MAX_ENCAP_HDR)); + size_t inlen; + int rc; + + rc = efx_mae_encap_type_to_mae_type(encap->type); + if (rc < 0) + return rc; + MCDI_SET_DWORD(inbuf, MAE_ENCAP_HEADER_UPDATE_IN_ENCAP_TYPE, rc); + MCDI_SET_DWORD(inbuf, MAE_ENCAP_HEADER_UPDATE_IN_EH_ID, + encap->fw_id); + inlen = MC_CMD_MAE_ENCAP_HEADER_UPDATE_IN_LEN(encap->encap_hdr_len); + if (WARN_ON(inlen > sizeof(inbuf))) /* can't happen */ + return -EINVAL; + memcpy(MCDI_PTR(inbuf, MAE_ENCAP_HEADER_UPDATE_IN_HDR_DATA), + encap->encap_hdr, + encap->encap_hdr_len); + + BUILD_BUG_ON(MC_CMD_MAE_ENCAP_HEADER_UPDATE_OUT_LEN != 0); + return efx_mcdi_rpc(efx, MC_CMD_MAE_ENCAP_HEADER_UPDATE, inbuf, + inlen, NULL, 0, NULL); +} + +int efx_mae_free_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_ENCAP_HEADER_FREE_OUT_LEN(1)); + MCDI_DECLARE_BUF(inbuf, MC_CMD_MAE_ENCAP_HEADER_FREE_IN_LEN(1)); + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, MAE_ENCAP_HEADER_FREE_IN_EH_ID, encap->fw_id); + rc = efx_mcdi_rpc(efx, MC_CMD_MAE_ENCAP_HEADER_FREE, inbuf, + sizeof(inbuf), outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + if (outlen < sizeof(outbuf)) + return -EIO; + /* FW freed a different ID than we asked for, should also never happen. + * Warn because it means we've now got a different idea to the FW of + * what encap_mds exist, which could cause mayhem later. + */ + if (WARN_ON(MCDI_DWORD(outbuf, MAE_ENCAP_HEADER_FREE_OUT_FREED_EH_ID) != encap->fw_id)) + return -EIO; + /* We're probably about to free @encap, but let's just make sure its + * fw_id is blatted so that it won't look valid if it leaks out. + */ + encap->fw_id = MC_CMD_MAE_ENCAP_HEADER_ALLOC_OUT_ENCAP_HEADER_ID_NULL; + return 0; +} + int efx_mae_lookup_mport(struct efx_nic *efx, u32 vf_idx, u32 *id) { struct ef100_nic_data *nic_data = efx->nic_data; @@ -815,8 +915,12 @@ int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act) MCDI_SET_WORD_BE(inbuf, MAE_ACTION_SET_ALLOC_IN_VLAN1_PROTO_BE, act->vlan_proto[1]); } - MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_ENCAP_HEADER_ID, - MC_CMD_MAE_ENCAP_HEADER_ALLOC_OUT_ENCAP_HEADER_ID_NULL); + if (act->encap_md) + MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_ENCAP_HEADER_ID, + act->encap_md->fw_id); + else + MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_ENCAP_HEADER_ID, + MC_CMD_MAE_ENCAP_HEADER_ALLOC_OUT_ENCAP_HEADER_ID_NULL); if (act->deliver) MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_DELIVER, act->dest_mport); @@ -1001,8 +1105,16 @@ int efx_mae_register_encap_match(struct efx_nic *efx, encap->udp_dport); MCDI_STRUCT_SET_WORD_BE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_L4_DPORT_BE_MASK, ~(__be16)0); + MCDI_STRUCT_SET_WORD_BE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_L4_DPORT_BE, + encap->udp_sport); + MCDI_STRUCT_SET_WORD_BE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_L4_DPORT_BE_MASK, + encap->udp_sport_mask); MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_PROTO, IPPROTO_UDP); MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_PROTO_MASK, ~0); + MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_TOS, + encap->ip_tos); + MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_TOS_MASK, + encap->ip_tos_mask); rc = efx_mcdi_rpc(efx, MC_CMD_MAE_OUTER_RULE_INSERT, inbuf, sizeof(inbuf), outbuf, sizeof(outbuf), &outlen); if (rc) @@ -1203,6 +1315,29 @@ int efx_mae_insert_rule(struct efx_nic *efx, const struct efx_tc_match *match, return 0; } +int efx_mae_update_rule(struct efx_nic *efx, u32 acts_id, u32 id) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_MAE_ACTION_RULE_UPDATE_IN_LEN); + MCDI_DECLARE_STRUCT_PTR(response); + + BUILD_BUG_ON(MC_CMD_MAE_ACTION_RULE_UPDATE_OUT_LEN); + response = _MCDI_DWORD(inbuf, MAE_ACTION_RULE_UPDATE_IN_RESPONSE); + + MCDI_SET_DWORD(inbuf, MAE_ACTION_RULE_UPDATE_IN_AR_ID, id); + if (efx_mae_asl_id(acts_id)) { + MCDI_STRUCT_SET_DWORD(response, MAE_ACTION_RULE_RESPONSE_ASL_ID, acts_id); + MCDI_STRUCT_SET_DWORD(response, MAE_ACTION_RULE_RESPONSE_AS_ID, + MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL); + } else { + /* We only had one AS, so we didn't wrap it in an ASL */ + MCDI_STRUCT_SET_DWORD(response, MAE_ACTION_RULE_RESPONSE_ASL_ID, + MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL); + MCDI_STRUCT_SET_DWORD(response, MAE_ACTION_RULE_RESPONSE_AS_ID, acts_id); + } + return efx_mcdi_rpc(efx, MC_CMD_MAE_ACTION_RULE_UPDATE, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + int efx_mae_delete_rule(struct efx_nic *efx, u32 id) { MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_ACTION_RULE_DELETE_OUT_LEN(1)); diff --git a/drivers/net/ethernet/sfc/mae.h b/drivers/net/ethernet/sfc/mae.h index 9226219491a0..24abfe509690 100644 --- a/drivers/net/ethernet/sfc/mae.h +++ b/drivers/net/ethernet/sfc/mae.h @@ -82,6 +82,7 @@ int efx_mae_match_check_caps(struct efx_nic *efx, const struct efx_tc_match_fields *mask, struct netlink_ext_ack *extack); int efx_mae_check_encap_match_caps(struct efx_nic *efx, bool ipv6, + u8 ip_tos_mask, __be16 udp_sport_mask, struct netlink_ext_ack *extack); int efx_mae_check_encap_type_supported(struct efx_nic *efx, enum efx_encap_type typ); @@ -89,6 +90,13 @@ int efx_mae_check_encap_type_supported(struct efx_nic *efx, int efx_mae_allocate_counter(struct efx_nic *efx, struct efx_tc_counter *cnt); int efx_mae_free_counter(struct efx_nic *efx, struct efx_tc_counter *cnt); +int efx_mae_allocate_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap); +int efx_mae_update_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap); +int efx_mae_free_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap); + int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act); int efx_mae_free_action_set(struct efx_nic *efx, u32 fw_id); @@ -104,6 +112,7 @@ int efx_mae_unregister_encap_match(struct efx_nic *efx, int efx_mae_insert_rule(struct efx_nic *efx, const struct efx_tc_match *match, u32 prio, u32 acts_id, u32 *id); +int efx_mae_update_rule(struct efx_nic *efx, u32 acts_id, u32 id); int efx_mae_delete_rule(struct efx_nic *efx, u32 id); int efx_init_mae(struct efx_nic *efx); diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index fcd51d3992fa..a7a22b019794 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -27,6 +27,7 @@ #include <linux/mtd/mtd.h> #include <net/busy_poll.h> #include <net/xdp.h> +#include <net/netevent.h> #include "enum.h" #include "bitfield.h" @@ -996,6 +997,7 @@ struct efx_mae; * @xdp_rxq_info_failed: Have any of the rx queues failed to initialise their * xdp_rxq_info structures? * @netdev_notifier: Netdevice notifier. + * @netevent_notifier: Netevent notifier (for neighbour updates). * @tc: state for TC offload (EF100). * @devlink: reference to devlink structure owned by this device * @dl_port: devlink port associated with the PF @@ -1183,6 +1185,7 @@ struct efx_nic { bool xdp_rxq_info_failed; struct notifier_block netdev_notifier; + struct notifier_block netevent_notifier; struct efx_tc_state *tc; struct devlink *devlink; diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index 0c40571133cb..00cf6de3bb2b 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -1485,7 +1485,9 @@ static int efx_ptp_insert_multicast_filters(struct efx_nic *efx) goto fail; rc = efx_ptp_insert_eth_multicast_filter(efx); - if (rc < 0) + + /* Not all firmware variants support this filter */ + if (rc < 0 && rc != -EPROTONOSUPPORT) goto fail; } diff --git a/drivers/net/ethernet/sfc/selftest.c b/drivers/net/ethernet/sfc/selftest.c index 3c5227afd497..563c1e317ce9 100644 --- a/drivers/net/ethernet/sfc/selftest.c +++ b/drivers/net/ethernet/sfc/selftest.c @@ -42,12 +42,17 @@ * Falcon only performs RSS on TCP/UDP packets. */ struct efx_loopback_payload { - struct ethhdr header; - struct iphdr ip; - struct udphdr udp; - __be16 iteration; - char msg[64]; -} __packed; + char pad[2]; /* Ensures ip is 4-byte aligned */ + struct_group_attr(packet, __packed, + struct ethhdr header; + struct iphdr ip; + struct udphdr udp; + __be16 iteration; + char msg[64]; + ); +} __packed __aligned(4); +#define EFX_LOOPBACK_PAYLOAD_LEN \ + sizeof_field(struct efx_loopback_payload, packet) /* Loopback test source MAC address */ static const u8 payload_source[ETH_ALEN] __aligned(2) = { @@ -282,7 +287,7 @@ void efx_loopback_rx_packet(struct efx_nic *efx, const char *buf_ptr, int pkt_len) { struct efx_loopback_state *state = efx->loopback_selftest; - struct efx_loopback_payload *received; + struct efx_loopback_payload received; struct efx_loopback_payload *payload; BUG_ON(!buf_ptr); @@ -293,13 +298,14 @@ void efx_loopback_rx_packet(struct efx_nic *efx, payload = &state->payload; - received = (struct efx_loopback_payload *) buf_ptr; - received->ip.saddr = payload->ip.saddr; + memcpy(&received.packet, buf_ptr, + min_t(int, pkt_len, EFX_LOOPBACK_PAYLOAD_LEN)); + received.ip.saddr = payload->ip.saddr; if (state->offload_csum) - received->ip.check = payload->ip.check; + received.ip.check = payload->ip.check; /* Check that header exists */ - if (pkt_len < sizeof(received->header)) { + if (pkt_len < sizeof(received.header)) { netif_err(efx, drv, efx->net_dev, "saw runt RX packet (length %d) in %s loopback " "test\n", pkt_len, LOOPBACK_MODE(efx)); @@ -307,7 +313,7 @@ void efx_loopback_rx_packet(struct efx_nic *efx, } /* Check that the ethernet header exists */ - if (memcmp(&received->header, &payload->header, ETH_HLEN) != 0) { + if (memcmp(&received.header, &payload->header, ETH_HLEN) != 0) { netif_err(efx, drv, efx->net_dev, "saw non-loopback RX packet in %s loopback test\n", LOOPBACK_MODE(efx)); @@ -315,16 +321,16 @@ void efx_loopback_rx_packet(struct efx_nic *efx, } /* Check packet length */ - if (pkt_len != sizeof(*payload)) { + if (pkt_len != EFX_LOOPBACK_PAYLOAD_LEN) { netif_err(efx, drv, efx->net_dev, "saw incorrect RX packet length %d (wanted %d) in " - "%s loopback test\n", pkt_len, (int)sizeof(*payload), - LOOPBACK_MODE(efx)); + "%s loopback test\n", pkt_len, + (int)EFX_LOOPBACK_PAYLOAD_LEN, LOOPBACK_MODE(efx)); goto err; } /* Check that IP header matches */ - if (memcmp(&received->ip, &payload->ip, sizeof(payload->ip)) != 0) { + if (memcmp(&received.ip, &payload->ip, sizeof(payload->ip)) != 0) { netif_err(efx, drv, efx->net_dev, "saw corrupted IP header in %s loopback test\n", LOOPBACK_MODE(efx)); @@ -332,7 +338,7 @@ void efx_loopback_rx_packet(struct efx_nic *efx, } /* Check that msg and padding matches */ - if (memcmp(&received->msg, &payload->msg, sizeof(received->msg)) != 0) { + if (memcmp(&received.msg, &payload->msg, sizeof(received.msg)) != 0) { netif_err(efx, drv, efx->net_dev, "saw corrupted RX packet in %s loopback test\n", LOOPBACK_MODE(efx)); @@ -340,10 +346,10 @@ void efx_loopback_rx_packet(struct efx_nic *efx, } /* Check that iteration matches */ - if (received->iteration != payload->iteration) { + if (received.iteration != payload->iteration) { netif_err(efx, drv, efx->net_dev, "saw RX packet from iteration %d (wanted %d) in " - "%s loopback test\n", ntohs(received->iteration), + "%s loopback test\n", ntohs(received.iteration), ntohs(payload->iteration), LOOPBACK_MODE(efx)); goto err; } @@ -363,7 +369,8 @@ void efx_loopback_rx_packet(struct efx_nic *efx, buf_ptr, pkt_len, 0); netif_err(efx, drv, efx->net_dev, "expected packet:\n"); print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 0x10, 1, - &state->payload, sizeof(state->payload), 0); + &state->payload.packet, EFX_LOOPBACK_PAYLOAD_LEN, + 0); } #endif atomic_inc(&state->rx_bad); @@ -385,14 +392,15 @@ static void efx_iterate_state(struct efx_nic *efx) payload->ip.daddr = htonl(INADDR_LOOPBACK); payload->ip.ihl = 5; payload->ip.check = (__force __sum16) htons(0xdead); - payload->ip.tot_len = htons(sizeof(*payload) - sizeof(struct ethhdr)); + payload->ip.tot_len = htons(sizeof(*payload) - + offsetof(struct efx_loopback_payload, ip)); payload->ip.version = IPVERSION; payload->ip.protocol = IPPROTO_UDP; /* Initialise udp header */ payload->udp.source = 0; - payload->udp.len = htons(sizeof(*payload) - sizeof(struct ethhdr) - - sizeof(struct iphdr)); + payload->udp.len = htons(sizeof(*payload) - + offsetof(struct efx_loopback_payload, udp)); payload->udp.check = 0; /* checksum ignored */ /* Fill out payload */ @@ -429,6 +437,10 @@ static int efx_begin_loopback(struct efx_tx_queue *tx_queue) payload = skb_put(skb, sizeof(state->payload)); memcpy(payload, &state->payload, sizeof(state->payload)); payload->ip.saddr = htonl(INADDR_LOOPBACK | (i << 2)); + /* Strip off the leading padding */ + skb_pull(skb, offsetof(struct efx_loopback_payload, header)); + /* Strip off the trailing padding */ + skb_trim(skb, EFX_LOOPBACK_PAYLOAD_LEN); /* Ensure everything we've written is visible to the * interrupt handler. */ diff --git a/drivers/net/ethernet/sfc/siena/selftest.c b/drivers/net/ethernet/sfc/siena/selftest.c index 07715a3d6bea..526da43d4b61 100644 --- a/drivers/net/ethernet/sfc/siena/selftest.c +++ b/drivers/net/ethernet/sfc/siena/selftest.c @@ -42,12 +42,17 @@ * Falcon only performs RSS on TCP/UDP packets. */ struct efx_loopback_payload { - struct ethhdr header; - struct iphdr ip; - struct udphdr udp; - __be16 iteration; - char msg[64]; -} __packed; + char pad[2]; /* Ensures ip is 4-byte aligned */ + struct_group_attr(packet, __packed, + struct ethhdr header; + struct iphdr ip; + struct udphdr udp; + __be16 iteration; + char msg[64]; + ); +} __packed __aligned(4); +#define EFX_LOOPBACK_PAYLOAD_LEN \ + sizeof_field(struct efx_loopback_payload, packet) /* Loopback test source MAC address */ static const u8 payload_source[ETH_ALEN] __aligned(2) = { @@ -282,7 +287,7 @@ void efx_siena_loopback_rx_packet(struct efx_nic *efx, const char *buf_ptr, int pkt_len) { struct efx_loopback_state *state = efx->loopback_selftest; - struct efx_loopback_payload *received; + struct efx_loopback_payload received; struct efx_loopback_payload *payload; BUG_ON(!buf_ptr); @@ -293,13 +298,14 @@ void efx_siena_loopback_rx_packet(struct efx_nic *efx, payload = &state->payload; - received = (struct efx_loopback_payload *) buf_ptr; - received->ip.saddr = payload->ip.saddr; + memcpy(&received.packet, buf_ptr, + min_t(int, pkt_len, EFX_LOOPBACK_PAYLOAD_LEN)); + received.ip.saddr = payload->ip.saddr; if (state->offload_csum) - received->ip.check = payload->ip.check; + received.ip.check = payload->ip.check; /* Check that header exists */ - if (pkt_len < sizeof(received->header)) { + if (pkt_len < sizeof(received.header)) { netif_err(efx, drv, efx->net_dev, "saw runt RX packet (length %d) in %s loopback " "test\n", pkt_len, LOOPBACK_MODE(efx)); @@ -307,7 +313,7 @@ void efx_siena_loopback_rx_packet(struct efx_nic *efx, } /* Check that the ethernet header exists */ - if (memcmp(&received->header, &payload->header, ETH_HLEN) != 0) { + if (memcmp(&received.header, &payload->header, ETH_HLEN) != 0) { netif_err(efx, drv, efx->net_dev, "saw non-loopback RX packet in %s loopback test\n", LOOPBACK_MODE(efx)); @@ -315,16 +321,16 @@ void efx_siena_loopback_rx_packet(struct efx_nic *efx, } /* Check packet length */ - if (pkt_len != sizeof(*payload)) { + if (pkt_len != EFX_LOOPBACK_PAYLOAD_LEN) { netif_err(efx, drv, efx->net_dev, "saw incorrect RX packet length %d (wanted %d) in " - "%s loopback test\n", pkt_len, (int)sizeof(*payload), - LOOPBACK_MODE(efx)); + "%s loopback test\n", pkt_len, + (int)EFX_LOOPBACK_PAYLOAD_LEN, LOOPBACK_MODE(efx)); goto err; } /* Check that IP header matches */ - if (memcmp(&received->ip, &payload->ip, sizeof(payload->ip)) != 0) { + if (memcmp(&received.ip, &payload->ip, sizeof(payload->ip)) != 0) { netif_err(efx, drv, efx->net_dev, "saw corrupted IP header in %s loopback test\n", LOOPBACK_MODE(efx)); @@ -332,7 +338,7 @@ void efx_siena_loopback_rx_packet(struct efx_nic *efx, } /* Check that msg and padding matches */ - if (memcmp(&received->msg, &payload->msg, sizeof(received->msg)) != 0) { + if (memcmp(&received.msg, &payload->msg, sizeof(received.msg)) != 0) { netif_err(efx, drv, efx->net_dev, "saw corrupted RX packet in %s loopback test\n", LOOPBACK_MODE(efx)); @@ -340,10 +346,10 @@ void efx_siena_loopback_rx_packet(struct efx_nic *efx, } /* Check that iteration matches */ - if (received->iteration != payload->iteration) { + if (received.iteration != payload->iteration) { netif_err(efx, drv, efx->net_dev, "saw RX packet from iteration %d (wanted %d) in " - "%s loopback test\n", ntohs(received->iteration), + "%s loopback test\n", ntohs(received.iteration), ntohs(payload->iteration), LOOPBACK_MODE(efx)); goto err; } @@ -363,7 +369,8 @@ void efx_siena_loopback_rx_packet(struct efx_nic *efx, buf_ptr, pkt_len, 0); netif_err(efx, drv, efx->net_dev, "expected packet:\n"); print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 0x10, 1, - &state->payload, sizeof(state->payload), 0); + &state->payload.packet, EFX_LOOPBACK_PAYLOAD_LEN, + 0); } #endif atomic_inc(&state->rx_bad); @@ -385,14 +392,15 @@ static void efx_iterate_state(struct efx_nic *efx) payload->ip.daddr = htonl(INADDR_LOOPBACK); payload->ip.ihl = 5; payload->ip.check = (__force __sum16) htons(0xdead); - payload->ip.tot_len = htons(sizeof(*payload) - sizeof(struct ethhdr)); + payload->ip.tot_len = htons(sizeof(*payload) - + offsetof(struct efx_loopback_payload, ip)); payload->ip.version = IPVERSION; payload->ip.protocol = IPPROTO_UDP; /* Initialise udp header */ payload->udp.source = 0; - payload->udp.len = htons(sizeof(*payload) - sizeof(struct ethhdr) - - sizeof(struct iphdr)); + payload->udp.len = htons(sizeof(*payload) - + offsetof(struct efx_loopback_payload, udp)); payload->udp.check = 0; /* checksum ignored */ /* Fill out payload */ @@ -429,6 +437,10 @@ static int efx_begin_loopback(struct efx_tx_queue *tx_queue) payload = skb_put(skb, sizeof(state->payload)); memcpy(payload, &state->payload, sizeof(state->payload)); payload->ip.saddr = htonl(INADDR_LOOPBACK | (i << 2)); + /* Strip off the leading padding */ + skb_pull(skb, offsetof(struct efx_loopback_payload, header)); + /* Strip off the trailing padding */ + skb_trim(skb, EFX_LOOPBACK_PAYLOAD_LEN); /* Ensure everything we've written is visible to the * interrupt handler. */ diff --git a/drivers/net/ethernet/sfc/siena/tx_common.c b/drivers/net/ethernet/sfc/siena/tx_common.c index 93a32d61944f..a7a9ab304e13 100644 --- a/drivers/net/ethernet/sfc/siena/tx_common.c +++ b/drivers/net/ethernet/sfc/siena/tx_common.c @@ -12,6 +12,7 @@ #include "efx.h" #include "nic_common.h" #include "tx_common.h" +#include <net/gso.h> static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue) { diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index c004443c1d58..fe268b6c1cac 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -14,11 +14,12 @@ #include <net/geneve.h> #include "tc.h" #include "tc_bindings.h" +#include "tc_encap_actions.h" #include "mae.h" #include "ef100_rep.h" #include "efx.h" -static enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev) +enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev) { if (netif_is_vxlan(net_dev)) return EFX_ENCAP_TYPE_VXLAN; @@ -33,8 +34,8 @@ static enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev) * May return NULL for the PF (us), or an error pointer for a device that * isn't supported as a TC offload endpoint */ -static struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx, - struct net_device *dev) +struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx, + struct net_device *dev) { struct efx_rep *efv; @@ -70,7 +71,7 @@ static s64 efx_tc_flower_internal_mport(struct efx_nic *efx, struct efx_rep *efv } /* Convert a driver-internal vport ID into an external device (wire or VF) */ -static s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv) +s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv) { u32 mport; @@ -109,8 +110,17 @@ static void efx_tc_free_action_set(struct efx_nic *efx, */ list_del(&act->list); } - if (act->count) + if (act->count) { + spin_lock_bh(&act->count->cnt->lock); + if (!list_empty(&act->count_user)) + list_del(&act->count_user); + spin_unlock_bh(&act->count->cnt->lock); efx_tc_flower_put_counter_index(efx, act->count); + } + if (act->encap_md) { + list_del(&act->encap_user); + efx_tc_flower_release_encap_md(efx, act->encap_md); + } kfree(act); } @@ -132,23 +142,6 @@ static void efx_tc_free_action_set_list(struct efx_nic *efx, /* Don't kfree, as acts is embedded inside a struct efx_tc_flow_rule */ } -static void efx_tc_flow_free(void *ptr, void *arg) -{ - struct efx_tc_flow_rule *rule = ptr; - struct efx_nic *efx = arg; - - netif_err(efx, drv, efx->net_dev, - "tc rule %lx still present at teardown, removing\n", - rule->cookie); - - efx_mae_delete_rule(efx, rule->fw_id); - - /* Release entries in subsidiary tables */ - efx_tc_free_action_set_list(efx, &rule->acts, true); - - kfree(rule); -} - /* Boilerplate for the simple 'copy a field' cases */ #define _MAP_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field) \ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_##_name)) { \ @@ -219,6 +212,7 @@ static int efx_tc_flower_parse_match(struct efx_nic *efx, BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_ENC_IP) | BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | BIT(FLOW_DISSECTOR_KEY_TCP) | @@ -363,20 +357,48 @@ static int efx_tc_flower_parse_match(struct efx_nic *efx, return 0; } +static void efx_tc_flower_release_encap_match(struct efx_nic *efx, + struct efx_tc_encap_match *encap) +{ + int rc; + + if (!refcount_dec_and_test(&encap->ref)) + return; /* still in use */ + + if (encap->type == EFX_TC_EM_DIRECT) { + rc = efx_mae_unregister_encap_match(efx, encap); + if (rc) + /* Display message but carry on and remove entry from our + * SW tables, because there's not much we can do about it. + */ + netif_err(efx, drv, efx->net_dev, + "Failed to release encap match %#x, rc %d\n", + encap->fw_id, rc); + } + rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage, + efx_tc_encap_match_ht_params); + if (encap->pseudo) + efx_tc_flower_release_encap_match(efx, encap->pseudo); + kfree(encap); +} + static int efx_tc_flower_record_encap_match(struct efx_nic *efx, struct efx_tc_match *match, enum efx_encap_type type, + enum efx_tc_em_pseudo_type em_type, + u8 child_ip_tos_mask, + __be16 child_udp_sport_mask, struct netlink_ext_ack *extack) { - struct efx_tc_encap_match *encap, *old; + struct efx_tc_encap_match *encap, *old, *pseudo = NULL; bool ipv6 = false; int rc; /* We require that the socket-defining fields (IP addrs and UDP dest - * port) are present and exact-match. Other fields are currently not - * allowed. This meets what OVS will ask for, and means that we don't - * need to handle difficult checks for overlapping matches as could - * come up if we allowed masks or varying sets of match fields. + * port) are present and exact-match. Other fields may only be used + * if the field-set (and any masks) are the same for all encap + * matches on the same <sip,dip,dport> tuple; this is enforced by + * pseudo encap matches. */ if (match->mask.enc_dst_ip | match->mask.enc_src_ip) { if (!IS_ALL_ONES(match->mask.enc_dst_ip)) { @@ -414,29 +436,42 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, NL_SET_ERR_MSG_MOD(extack, "Egress encap match is not exact on dst UDP port"); return -EOPNOTSUPP; } - if (match->mask.enc_sport) { - NL_SET_ERR_MSG_MOD(extack, "Egress encap match on src UDP port not supported"); - return -EOPNOTSUPP; - } - if (match->mask.enc_ip_tos) { - NL_SET_ERR_MSG_MOD(extack, "Egress encap match on IP ToS not supported"); - return -EOPNOTSUPP; + if (match->mask.enc_sport || match->mask.enc_ip_tos) { + struct efx_tc_match pmatch = *match; + + if (em_type == EFX_TC_EM_PSEUDO_MASK) { /* can't happen */ + NL_SET_ERR_MSG_MOD(extack, "Bad recursion in egress encap match handler"); + return -EOPNOTSUPP; + } + pmatch.value.enc_ip_tos = 0; + pmatch.mask.enc_ip_tos = 0; + pmatch.value.enc_sport = 0; + pmatch.mask.enc_sport = 0; + rc = efx_tc_flower_record_encap_match(efx, &pmatch, type, + EFX_TC_EM_PSEUDO_MASK, + match->mask.enc_ip_tos, + match->mask.enc_sport, + extack); + if (rc) + return rc; + pseudo = pmatch.encap; } if (match->mask.enc_ip_ttl) { NL_SET_ERR_MSG_MOD(extack, "Egress encap match on IP TTL not supported"); - return -EOPNOTSUPP; + rc = -EOPNOTSUPP; + goto fail_pseudo; } - rc = efx_mae_check_encap_match_caps(efx, ipv6, extack); - if (rc) { - NL_SET_ERR_MSG_FMT_MOD(extack, "MAE hw reports no support for IPv%d encap matches", - ipv6 ? 6 : 4); - return -EOPNOTSUPP; - } + rc = efx_mae_check_encap_match_caps(efx, ipv6, match->mask.enc_ip_tos, + match->mask.enc_sport, extack); + if (rc) + goto fail_pseudo; encap = kzalloc(sizeof(*encap), GFP_USER); - if (!encap) - return -ENOMEM; + if (!encap) { + rc = -ENOMEM; + goto fail_pseudo; + } encap->src_ip = match->value.enc_src_ip; encap->dst_ip = match->value.enc_dst_ip; #ifdef CONFIG_IPV6 @@ -445,12 +480,66 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, #endif encap->udp_dport = match->value.enc_dport; encap->tun_type = type; + encap->ip_tos = match->value.enc_ip_tos; + encap->ip_tos_mask = match->mask.enc_ip_tos; + encap->child_ip_tos_mask = child_ip_tos_mask; + encap->udp_sport = match->value.enc_sport; + encap->udp_sport_mask = match->mask.enc_sport; + encap->child_udp_sport_mask = child_udp_sport_mask; + encap->type = em_type; + encap->pseudo = pseudo; old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_match_ht, &encap->linkage, efx_tc_encap_match_ht_params); if (old) { /* don't need our new entry */ kfree(encap); + if (pseudo) /* don't need our new pseudo either */ + efx_tc_flower_release_encap_match(efx, pseudo); + /* check old and new em_types are compatible */ + switch (old->type) { + case EFX_TC_EM_DIRECT: + /* old EM is in hardware, so mustn't overlap with a + * pseudo, but may be shared with another direct EM + */ + if (em_type == EFX_TC_EM_DIRECT) + break; + NL_SET_ERR_MSG_MOD(extack, "Pseudo encap match conflicts with existing direct entry"); + return -EEXIST; + case EFX_TC_EM_PSEUDO_MASK: + /* old EM is protecting a ToS- or src port-qualified + * filter, so may only be shared with another pseudo + * for the same ToS and src port masks. + */ + if (em_type != EFX_TC_EM_PSEUDO_MASK) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "%s encap match conflicts with existing pseudo(MASK) entry", + em_type ? "Pseudo" : "Direct"); + return -EEXIST; + } + if (child_ip_tos_mask != old->child_ip_tos_mask) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Pseudo encap match for TOS mask %#04x conflicts with existing pseudo(MASK) entry for TOS mask %#04x", + child_ip_tos_mask, + old->child_ip_tos_mask); + return -EEXIST; + } + if (child_udp_sport_mask != old->child_udp_sport_mask) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Pseudo encap match for UDP src port mask %#x conflicts with existing pseudo(MASK) entry for mask %#x", + child_udp_sport_mask, + old->child_udp_sport_mask); + return -EEXIST; + } + break; + default: /* Unrecognised pseudo-type. Just say no */ + NL_SET_ERR_MSG_FMT_MOD(extack, + "%s encap match conflicts with existing pseudo(%d) entry", + em_type ? "Pseudo" : "Direct", + old->type); + return -EEXIST; + } + /* check old and new tun_types are compatible */ if (old->tun_type != type) { NL_SET_ERR_MSG_FMT_MOD(extack, "Egress encap match with conflicting tun_type %u != %u", @@ -462,10 +551,12 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, /* existing entry found */ encap = old; } else { - rc = efx_mae_register_encap_match(efx, encap); - if (rc) { - NL_SET_ERR_MSG_MOD(extack, "Failed to record egress encap match in HW"); - goto fail; + if (em_type == EFX_TC_EM_DIRECT) { + rc = efx_mae_register_encap_match(efx, encap); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Failed to record egress encap match in HW"); + goto fail; + } } refcount_set(&encap->ref, 1); } @@ -475,30 +566,12 @@ fail: rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage, efx_tc_encap_match_ht_params); kfree(encap); +fail_pseudo: + if (pseudo) + efx_tc_flower_release_encap_match(efx, pseudo); return rc; } -static void efx_tc_flower_release_encap_match(struct efx_nic *efx, - struct efx_tc_encap_match *encap) -{ - int rc; - - if (!refcount_dec_and_test(&encap->ref)) - return; /* still in use */ - - rc = efx_mae_unregister_encap_match(efx, encap); - if (rc) - /* Display message but carry on and remove entry from our - * SW tables, because there's not much we can do about it. - */ - netif_err(efx, drv, efx->net_dev, - "Failed to release encap match %#x, rc %d\n", - encap->fw_id, rc); - rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage, - efx_tc_encap_match_ht_params); - kfree(encap); -} - static void efx_tc_delete_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule) { efx_mae_delete_rule(efx, rule->fw_id); @@ -531,6 +604,7 @@ enum efx_tc_action_order { EFX_TC_AO_VLAN_POP, EFX_TC_AO_VLAN_PUSH, EFX_TC_AO_COUNT, + EFX_TC_AO_ENCAP, EFX_TC_AO_DELIVER }; /* Determine whether we can add @new action without violating order */ @@ -560,6 +634,10 @@ static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act, if (act->count) return false; fallthrough; + case EFX_TC_AO_ENCAP: + if (act->encap_md) + return false; + fallthrough; case EFX_TC_AO_DELIVER: return !act->deliver; default: @@ -650,6 +728,7 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx, } rc = efx_tc_flower_record_encap_match(efx, &match, type, + EFX_TC_EM_DIRECT, 0, 0, extack); if (rc) return rc; @@ -722,6 +801,7 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx, goto release; } act->count = ctr; + INIT_LIST_HEAD(&act->count_user); } if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) { @@ -854,11 +934,13 @@ static int efx_tc_flower_replace(struct efx_nic *efx, { struct flow_rule *fr = flow_cls_offload_flow_rule(tc); struct netlink_ext_ack *extack = tc->common.extack; + const struct ip_tunnel_info *encap_info = NULL; struct efx_tc_flow_rule *rule = NULL, *old; struct efx_tc_action_set *act = NULL; const struct flow_action_entry *fa; struct efx_rep *from_efv, *to_efv; struct efx_tc_match match; + u32 acts_id; s64 rc; int i; @@ -1007,6 +1089,7 @@ static int efx_tc_flower_replace(struct efx_nic *efx, goto release; } act->count = ctr; + INIT_LIST_HEAD(&act->count_user); } switch (fa->id) { @@ -1023,6 +1106,59 @@ static int efx_tc_flower_replace(struct efx_nic *efx, case FLOW_ACTION_MIRRED: save = *act; + if (encap_info) { + struct efx_tc_encap_action *encap; + + if (!efx_tc_flower_action_order_ok(act, + EFX_TC_AO_ENCAP)) { + rc = -EOPNOTSUPP; + NL_SET_ERR_MSG_MOD(extack, "Encap action violates action order"); + goto release; + } + encap = efx_tc_flower_create_encap_md( + efx, encap_info, fa->dev, extack); + if (IS_ERR_OR_NULL(encap)) { + rc = PTR_ERR(encap); + if (!rc) + rc = -EIO; /* arbitrary */ + goto release; + } + act->encap_md = encap; + list_add_tail(&act->encap_user, &encap->users); + act->dest_mport = encap->dest_mport; + act->deliver = 1; + if (act->count && !WARN_ON(!act->count->cnt)) { + /* This counter is used by an encap + * action, which needs a reference back + * so it can prod neighbouring whenever + * traffic is seen. + */ + spin_lock_bh(&act->count->cnt->lock); + list_add_tail(&act->count_user, + &act->count->cnt->users); + spin_unlock_bh(&act->count->cnt->lock); + } + rc = efx_mae_alloc_action_set(efx, act); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (encap)"); + goto release; + } + list_add_tail(&act->list, &rule->acts.list); + act->user = &rule->acts; + act = NULL; + if (fa->id == FLOW_ACTION_REDIRECT) + break; /* end of the line */ + /* Mirror, so continue on with saved act */ + save.count = NULL; + act = kzalloc(sizeof(*act), GFP_USER); + if (!act) { + rc = -ENOMEM; + goto release; + } + *act = save; + break; + } + if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) { /* can't happen */ rc = -EOPNOTSUPP; @@ -1086,6 +1222,37 @@ static int efx_tc_flower_replace(struct efx_nic *efx, act->vlan_proto[act->vlan_push] = fa->vlan.proto; act->vlan_push++; break; + case FLOW_ACTION_TUNNEL_ENCAP: + if (encap_info) { + /* Can't specify encap multiple times. + * If you want to overwrite an existing + * encap_info, use an intervening + * FLOW_ACTION_TUNNEL_DECAP to clear it. + */ + NL_SET_ERR_MSG_MOD(extack, "Tunnel key set when already set"); + rc = -EINVAL; + goto release; + } + if (!fa->tunnel) { + NL_SET_ERR_MSG_MOD(extack, "Tunnel key set is missing key"); + rc = -EOPNOTSUPP; + goto release; + } + encap_info = fa->tunnel; + break; + case FLOW_ACTION_TUNNEL_DECAP: + if (encap_info) { + encap_info = NULL; + break; + } + /* Since we don't support enc_key matches on ingress + * (and if we did there'd be no tunnel-device to give + * us a type), we can't offload a decap that's not + * just undoing a previous encap action. + */ + NL_SET_ERR_MSG_MOD(extack, "Cannot offload tunnel decap action without tunnel device"); + rc = -EOPNOTSUPP; + goto release; default: NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u", fa->id); @@ -1129,8 +1296,21 @@ static int efx_tc_flower_replace(struct efx_nic *efx, NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw"); goto release; } + if (from_efv == EFX_EFV_PF) + /* PF netdev, so rule applies to traffic from wire */ + rule->fallback = &efx->tc->facts.pf; + else + /* repdev, so rule applies to traffic from representee */ + rule->fallback = &efx->tc->facts.reps; + if (!efx_tc_check_ready(efx, rule)) { + netif_dbg(efx, drv, efx->net_dev, "action not ready for hw\n"); + acts_id = rule->fallback->fw_id; + } else { + netif_dbg(efx, drv, efx->net_dev, "ready for hw\n"); + acts_id = rule->acts.fw_id; + } rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC, - rule->acts.fw_id, &rule->fw_id); + acts_id, &rule->fw_id); if (rc) { NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw"); goto release_acts; @@ -1327,6 +1507,58 @@ void efx_tc_deconfigure_default_rule(struct efx_nic *efx, rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL; } +static int efx_tc_configure_fallback_acts(struct efx_nic *efx, u32 eg_port, + struct efx_tc_action_set_list *acts) +{ + struct efx_tc_action_set *act; + int rc; + + act = kzalloc(sizeof(*act), GFP_KERNEL); + if (!act) + return -ENOMEM; + act->deliver = 1; + act->dest_mport = eg_port; + rc = efx_mae_alloc_action_set(efx, act); + if (rc) + goto fail1; + EFX_WARN_ON_PARANOID(!list_empty(&acts->list)); + list_add_tail(&act->list, &acts->list); + rc = efx_mae_alloc_action_set_list(efx, acts); + if (rc) + goto fail2; + return 0; +fail2: + list_del(&act->list); + efx_mae_free_action_set(efx, act->fw_id); +fail1: + kfree(act); + return rc; +} + +static int efx_tc_configure_fallback_acts_pf(struct efx_nic *efx) +{ + struct efx_tc_action_set_list *acts = &efx->tc->facts.pf; + u32 eg_port; + + efx_mae_mport_uplink(efx, &eg_port); + return efx_tc_configure_fallback_acts(efx, eg_port, acts); +} + +static int efx_tc_configure_fallback_acts_reps(struct efx_nic *efx) +{ + struct efx_tc_action_set_list *acts = &efx->tc->facts.reps; + u32 eg_port; + + efx_mae_mport_mport(efx, efx->tc->reps_mport_id, &eg_port); + return efx_tc_configure_fallback_acts(efx, eg_port, acts); +} + +static void efx_tc_deconfigure_fallback_acts(struct efx_nic *efx, + struct efx_tc_action_set_list *acts) +{ + efx_tc_free_action_set_list(efx, acts, true); +} + static int efx_tc_configure_rep_mport(struct efx_nic *efx) { u32 rep_mport_label; @@ -1419,10 +1651,16 @@ int efx_init_tc(struct efx_nic *efx) rc = efx_tc_configure_rep_mport(efx); if (rc) return rc; - efx->tc->up = true; + rc = efx_tc_configure_fallback_acts_pf(efx); + if (rc) + return rc; + rc = efx_tc_configure_fallback_acts_reps(efx); + if (rc) + return rc; rc = flow_indr_dev_register(efx_tc_indr_setup_cb, efx); if (rc) return rc; + efx->tc->up = true; return 0; } @@ -1436,6 +1674,8 @@ void efx_fini_tc(struct efx_nic *efx) efx_tc_deconfigure_rep_mport(efx); efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.pf); efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.wire); + efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.pf); + efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.reps); efx->tc->up = false; } @@ -1451,6 +1691,21 @@ static void efx_tc_encap_match_free(void *ptr, void *__unused) kfree(encap); } +static void efx_tc_flow_free(void *ptr, void *arg) +{ + struct efx_tc_flow_rule *rule = ptr; + struct efx_nic *efx = arg; + + netif_err(efx, drv, efx->net_dev, + "tc rule %lx still present at teardown, removing\n", + rule->cookie); + + /* Also releases entries in subsidiary tables */ + efx_tc_delete_rule(efx, rule); + + kfree(rule); +} + int efx_init_struct_tc(struct efx_nic *efx) { int rc; @@ -1470,6 +1725,9 @@ int efx_init_struct_tc(struct efx_nic *efx) mutex_init(&efx->tc->mutex); init_waitqueue_head(&efx->tc->flush_wq); + rc = efx_tc_init_encap_actions(efx); + if (rc < 0) + goto fail_encap_actions; rc = efx_tc_init_counters(efx); if (rc < 0) goto fail_counters; @@ -1485,6 +1743,10 @@ int efx_init_struct_tc(struct efx_nic *efx) efx->tc->dflt.pf.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL; INIT_LIST_HEAD(&efx->tc->dflt.wire.acts.list); efx->tc->dflt.wire.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL; + INIT_LIST_HEAD(&efx->tc->facts.pf.list); + efx->tc->facts.pf.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL; + INIT_LIST_HEAD(&efx->tc->facts.reps.list); + efx->tc->facts.reps.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL; efx->extra_channel_type[EFX_EXTRA_CHANNEL_TC] = &efx_tc_channel_type; return 0; fail_match_action_ht: @@ -1492,6 +1754,8 @@ fail_match_action_ht: fail_encap_match_ht: efx_tc_destroy_counters(efx); fail_counters: + efx_tc_destroy_encap_actions(efx); +fail_encap_actions: mutex_destroy(&efx->tc->mutex); kfree(efx->tc->caps); fail_alloc_caps: @@ -1510,11 +1774,16 @@ void efx_fini_struct_tc(struct efx_nic *efx) MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL); EFX_WARN_ON_PARANOID(efx->tc->dflt.wire.fw_id != MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL); + EFX_WARN_ON_PARANOID(efx->tc->facts.pf.fw_id != + MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL); + EFX_WARN_ON_PARANOID(efx->tc->facts.reps.fw_id != + MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL); rhashtable_free_and_destroy(&efx->tc->match_action_ht, efx_tc_flow_free, efx); rhashtable_free_and_destroy(&efx->tc->encap_match_ht, efx_tc_encap_match_free, NULL); efx_tc_fini_counters(efx); + efx_tc_fini_encap_actions(efx); mutex_unlock(&efx->tc->mutex); mutex_destroy(&efx->tc->mutex); kfree(efx->tc->caps); diff --git a/drivers/net/ethernet/sfc/tc.h b/drivers/net/ethernet/sfc/tc.h index 04cced6a2d39..1549c3df43bb 100644 --- a/drivers/net/ethernet/sfc/tc.h +++ b/drivers/net/ethernet/sfc/tc.h @@ -25,6 +25,8 @@ static inline bool efx_ipv6_addr_all_ones(struct in6_addr *addr) } #endif +struct efx_tc_encap_action; /* see tc_encap_actions.h */ + struct efx_tc_action_set { u16 vlan_push:2; u16 vlan_pop:2; @@ -33,6 +35,10 @@ struct efx_tc_action_set { __be16 vlan_tci[2]; /* TCIs for vlan_push */ __be16 vlan_proto[2]; /* Ethertypes for vlan_push */ struct efx_tc_counter_index *count; + struct efx_tc_encap_action *encap_md; /* entry in tc_encap_ht table */ + struct list_head encap_user; /* entry on encap_md->users list */ + struct efx_tc_action_set_list *user; /* Only populated if encap_md */ + struct list_head count_user; /* entry on counter->users list, if encap */ u32 dest_mport; u32 fw_id; /* index of this entry in firmware actions table */ struct list_head list; @@ -74,14 +80,41 @@ static inline bool efx_tc_match_is_encap(const struct efx_tc_match_fields *mask) mask->enc_ip_ttl || mask->enc_sport || mask->enc_dport; } +/** + * enum efx_tc_em_pseudo_type - &struct efx_tc_encap_match pseudo type + * + * These are used to classify "pseudo" encap matches, which don't refer + * to an entry in hardware but rather indicate that a section of the + * match space is in use by another Outer Rule. + * + * @EFX_TC_EM_DIRECT: real HW entry in Outer Rule table; not a pseudo. + * Hardware index in &struct efx_tc_encap_match.fw_id is valid. + * @EFX_TC_EM_PSEUDO_MASK: registered by an encap match which includes a + * match on an optional field (currently ip_tos and/or udp_sport), + * to prevent an overlapping encap match _without_ optional fields. + * The pseudo encap match may be referenced again by an encap match + * with different values for these fields, but all masks must match the + * first (stored in our child_* fields). + */ +enum efx_tc_em_pseudo_type { + EFX_TC_EM_DIRECT, + EFX_TC_EM_PSEUDO_MASK, +}; + struct efx_tc_encap_match { __be32 src_ip, dst_ip; struct in6_addr src_ip6, dst_ip6; __be16 udp_dport; + __be16 udp_sport, udp_sport_mask; + u8 ip_tos, ip_tos_mask; struct rhash_head linkage; enum efx_encap_type tun_type; + u8 child_ip_tos_mask; + __be16 child_udp_sport_mask; refcount_t ref; + enum efx_tc_em_pseudo_type type; u32 fw_id; /* index of this entry in firmware encap match table */ + struct efx_tc_encap_match *pseudo; /* Referenced pseudo EM if needed */ }; struct efx_tc_match { @@ -100,6 +133,7 @@ struct efx_tc_flow_rule { struct rhash_head linkage; struct efx_tc_match match; struct efx_tc_action_set_list acts; + struct efx_tc_action_set_list *fallback; /* what to use when unready? */ u32 fw_id; }; @@ -117,8 +151,10 @@ enum efx_tc_rule_prios { * @mutex: Used to serialise operations on TC hashtables * @counter_ht: Hashtable of TC counters (FW IDs and counter values) * @counter_id_ht: Hashtable mapping TC counter cookies to counters + * @encap_ht: Hashtable of TC encap actions * @encap_match_ht: Hashtable of TC encap matches * @match_action_ht: Hashtable of TC match-action rules + * @neigh_ht: Hashtable of neighbour watches (&struct efx_neigh_binder) * @reps_mport_id: MAE port allocated for representor RX * @reps_filter_uc: VNIC filter for representor unicast RX (promisc) * @reps_filter_mc: VNIC filter for representor multicast RX (allmulti) @@ -133,6 +169,11 @@ enum efx_tc_rule_prios { * %EFX_TC_PRIO_DFLT. Named by *ingress* port * @dflt.pf: rule for traffic ingressing from PF (egresses to wire) * @dflt.wire: rule for traffic ingressing from wire (egresses to PF) + * @facts: Fallback action-set-lists for unready rules. Named by *egress* port + * @facts.pf: action-set-list for unready rules on PF netdev, hence applying to + * traffic from wire, and egressing to PF + * @facts.reps: action-set-list for unready rules on representors, hence + * applying to traffic from representees, and egressing to the reps mport * @up: have TC datastructures been set up? */ struct efx_tc_state { @@ -141,8 +182,10 @@ struct efx_tc_state { struct mutex mutex; struct rhashtable counter_ht; struct rhashtable counter_id_ht; + struct rhashtable encap_ht; struct rhashtable encap_match_ht; struct rhashtable match_action_ht; + struct rhashtable neigh_ht; u32 reps_mport_id, reps_mport_vport_id; s32 reps_filter_uc, reps_filter_mc; bool flush_counters; @@ -153,11 +196,19 @@ struct efx_tc_state { struct efx_tc_flow_rule pf; struct efx_tc_flow_rule wire; } dflt; + struct { + struct efx_tc_action_set_list pf; + struct efx_tc_action_set_list reps; + } facts; bool up; }; struct efx_rep; +enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev); +struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx, + struct net_device *dev); +s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv); int efx_tc_configure_default_rule_rep(struct efx_rep *efv); void efx_tc_deconfigure_default_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule); diff --git a/drivers/net/ethernet/sfc/tc_bindings.c b/drivers/net/ethernet/sfc/tc_bindings.c index c18d64519c2d..1b79c535c54e 100644 --- a/drivers/net/ethernet/sfc/tc_bindings.c +++ b/drivers/net/ethernet/sfc/tc_bindings.c @@ -10,6 +10,7 @@ #include "tc_bindings.h" #include "tc.h" +#include "tc_encap_actions.h" struct efx_tc_block_binding { struct list_head list; @@ -226,3 +227,15 @@ int efx_tc_setup(struct net_device *net_dev, enum tc_setup_type type, return -EOPNOTSUPP; } + +int efx_tc_netdev_event(struct efx_nic *efx, unsigned long event, + struct net_device *net_dev) +{ + if (efx->type->is_vf) + return NOTIFY_DONE; + + if (event == NETDEV_UNREGISTER) + efx_tc_unregister_egdev(efx, net_dev); + + return NOTIFY_OK; +} diff --git a/drivers/net/ethernet/sfc/tc_bindings.h b/drivers/net/ethernet/sfc/tc_bindings.h index c210bb09150e..a326d23d322b 100644 --- a/drivers/net/ethernet/sfc/tc_bindings.h +++ b/drivers/net/ethernet/sfc/tc_bindings.h @@ -12,6 +12,7 @@ #define EFX_TC_BINDINGS_H #include "net_driver.h" +#if IS_ENABLED(CONFIG_SFC_SRIOV) #include <net/sch_generic.h> struct efx_rep; @@ -26,4 +27,17 @@ int efx_tc_indr_setup_cb(struct net_device *net_dev, struct Qdisc *sch, void *cb_priv, enum tc_setup_type type, void *type_data, void *data, void (*cleanup)(struct flow_block_cb *block_cb)); +int efx_tc_netdev_event(struct efx_nic *efx, unsigned long event, + struct net_device *net_dev); + +#else /* CONFIG_SFC_SRIOV */ + +static inline int efx_tc_netdev_event(struct efx_nic *efx, unsigned long event, + struct net_device *net_dev) +{ + return NOTIFY_DONE; +} + +#endif /* CONFIG_SFC_SRIOV */ + #endif /* EFX_TC_BINDINGS_H */ diff --git a/drivers/net/ethernet/sfc/tc_counters.c b/drivers/net/ethernet/sfc/tc_counters.c index d1a91d54c6bb..979f49058a0c 100644 --- a/drivers/net/ethernet/sfc/tc_counters.c +++ b/drivers/net/ethernet/sfc/tc_counters.c @@ -9,6 +9,7 @@ */ #include "tc_counters.h" +#include "tc_encap_actions.h" #include "mae_counter_format.h" #include "mae.h" #include "rx_common.h" @@ -31,6 +32,15 @@ static void efx_tc_counter_free(void *ptr, void *__unused) { struct efx_tc_counter *cnt = ptr; + WARN_ON(!list_empty(&cnt->users)); + /* We'd like to synchronize_rcu() here, but unfortunately we aren't + * removing the element from the hashtable (it's not clear that's a + * safe thing to do in an rhashtable_free_and_destroy free_fn), so + * threads could still be obtaining new pointers to *cnt if they can + * race against this function at all. + */ + flush_work(&cnt->work); + EFX_WARN_ON_PARANOID(spin_is_locked(&cnt->lock)); kfree(cnt); } @@ -74,6 +84,49 @@ void efx_tc_fini_counters(struct efx_nic *efx) rhashtable_free_and_destroy(&efx->tc->counter_ht, efx_tc_counter_free, NULL); } +static void efx_tc_counter_work(struct work_struct *work) +{ + struct efx_tc_counter *cnt = container_of(work, struct efx_tc_counter, work); + struct efx_tc_encap_action *encap; + struct efx_tc_action_set *act; + unsigned long touched; + struct neighbour *n; + + spin_lock_bh(&cnt->lock); + touched = READ_ONCE(cnt->touched); + + list_for_each_entry(act, &cnt->users, count_user) { + encap = act->encap_md; + if (!encap) + continue; + if (!encap->neigh) /* can't happen */ + continue; + if (time_after_eq(encap->neigh->used, touched)) + continue; + encap->neigh->used = touched; + /* We have passed traffic using this ARP entry, so + * indicate to the ARP cache that it's still active + */ + if (encap->neigh->dst_ip) + n = neigh_lookup(&arp_tbl, &encap->neigh->dst_ip, + encap->neigh->egdev); + else +#if IS_ENABLED(CONFIG_IPV6) + n = neigh_lookup(ipv6_stub->nd_tbl, + &encap->neigh->dst_ip6, + encap->neigh->egdev); +#else + n = NULL; +#endif + if (!n) + continue; + + neigh_event_send(n, NULL); + neigh_release(n); + } + spin_unlock_bh(&cnt->lock); +} + /* Counter allocation */ static struct efx_tc_counter *efx_tc_flower_allocate_counter(struct efx_nic *efx, @@ -87,12 +140,14 @@ static struct efx_tc_counter *efx_tc_flower_allocate_counter(struct efx_nic *efx return ERR_PTR(-ENOMEM); spin_lock_init(&cnt->lock); + INIT_WORK(&cnt->work, efx_tc_counter_work); cnt->touched = jiffies; cnt->type = type; rc = efx_mae_allocate_counter(efx, cnt); if (rc) goto fail1; + INIT_LIST_HEAD(&cnt->users); rc = rhashtable_insert_fast(&efx->tc->counter_ht, &cnt->linkage, efx_tc_counter_ht_params); if (rc) @@ -126,6 +181,7 @@ static void efx_tc_flower_release_counter(struct efx_nic *efx, netif_warn(efx, hw, efx->net_dev, "Failed to free MAE counter %u, rc %d\n", cnt->fw_id, rc); + WARN_ON(!list_empty(&cnt->users)); /* This doesn't protect counter updates coming in arbitrarily long * after we deleted the counter. The RCU just ensures that we won't * free the counter while another thread has a pointer to it. @@ -133,6 +189,7 @@ static void efx_tc_flower_release_counter(struct efx_nic *efx, * is handled by the generation count. */ synchronize_rcu(); + flush_work(&cnt->work); EFX_WARN_ON_PARANOID(spin_is_locked(&cnt->lock)); kfree(cnt); } @@ -302,6 +359,7 @@ static void efx_tc_counter_update(struct efx_nic *efx, cnt->touched = jiffies; } spin_unlock_bh(&cnt->lock); + schedule_work(&cnt->work); out: rcu_read_unlock(); } diff --git a/drivers/net/ethernet/sfc/tc_counters.h b/drivers/net/ethernet/sfc/tc_counters.h index 8fc7c4bbb29c..41e57f34b763 100644 --- a/drivers/net/ethernet/sfc/tc_counters.h +++ b/drivers/net/ethernet/sfc/tc_counters.h @@ -32,6 +32,9 @@ struct efx_tc_counter { u64 old_packets, old_bytes; /* Values last time passed to userspace */ /* jiffies of the last time we saw packets increase */ unsigned long touched; + struct work_struct work; /* For notifying encap actions */ + /* owners of corresponding count actions */ + struct list_head users; }; struct efx_tc_counter_index { diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.c b/drivers/net/ethernet/sfc/tc_encap_actions.c new file mode 100644 index 000000000000..7e8bcdb222ad --- /dev/null +++ b/drivers/net/ethernet/sfc/tc_encap_actions.c @@ -0,0 +1,747 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2023, Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include "tc_encap_actions.h" +#include "tc.h" +#include "mae.h" +#include <net/vxlan.h> +#include <net/geneve.h> +#include <net/netevent.h> +#include <net/arp.h> + +static const struct rhashtable_params efx_neigh_ht_params = { + .key_len = offsetof(struct efx_neigh_binder, ha), + .key_offset = 0, + .head_offset = offsetof(struct efx_neigh_binder, linkage), +}; + +static const struct rhashtable_params efx_tc_encap_ht_params = { + .key_len = offsetofend(struct efx_tc_encap_action, key), + .key_offset = 0, + .head_offset = offsetof(struct efx_tc_encap_action, linkage), +}; + +static void efx_tc_encap_free(void *ptr, void *__unused) +{ + struct efx_tc_encap_action *enc = ptr; + + WARN_ON(refcount_read(&enc->ref)); + kfree(enc); +} + +static void efx_neigh_free(void *ptr, void *__unused) +{ + struct efx_neigh_binder *neigh = ptr; + + WARN_ON(refcount_read(&neigh->ref)); + WARN_ON(!list_empty(&neigh->users)); + put_net_track(neigh->net, &neigh->ns_tracker); + netdev_put(neigh->egdev, &neigh->dev_tracker); + kfree(neigh); +} + +int efx_tc_init_encap_actions(struct efx_nic *efx) +{ + int rc; + + rc = rhashtable_init(&efx->tc->neigh_ht, &efx_neigh_ht_params); + if (rc < 0) + goto fail_neigh_ht; + rc = rhashtable_init(&efx->tc->encap_ht, &efx_tc_encap_ht_params); + if (rc < 0) + goto fail_encap_ht; + return 0; +fail_encap_ht: + rhashtable_destroy(&efx->tc->neigh_ht); +fail_neigh_ht: + return rc; +} + +/* Only call this in init failure teardown. + * Normal exit should fini instead as there may be entries in the table. + */ +void efx_tc_destroy_encap_actions(struct efx_nic *efx) +{ + rhashtable_destroy(&efx->tc->encap_ht); + rhashtable_destroy(&efx->tc->neigh_ht); +} + +void efx_tc_fini_encap_actions(struct efx_nic *efx) +{ + rhashtable_free_and_destroy(&efx->tc->encap_ht, efx_tc_encap_free, NULL); + rhashtable_free_and_destroy(&efx->tc->neigh_ht, efx_neigh_free, NULL); +} + +static void efx_neigh_update(struct work_struct *work); + +static int efx_bind_neigh(struct efx_nic *efx, + struct efx_tc_encap_action *encap, struct net *net, + struct netlink_ext_ack *extack) +{ + struct efx_neigh_binder *neigh, *old; + struct flowi6 flow6 = {}; + struct flowi4 flow4 = {}; + int rc; + + /* GCC stupidly thinks that only values explicitly listed in the enum + * definition can _possibly_ be sensible case values, so without this + * cast it complains about the IPv6 versions. + */ + switch ((int)encap->type) { + case EFX_ENCAP_TYPE_VXLAN: + case EFX_ENCAP_TYPE_GENEVE: + flow4.flowi4_proto = IPPROTO_UDP; + flow4.fl4_dport = encap->key.tp_dst; + flow4.flowi4_tos = encap->key.tos; + flow4.daddr = encap->key.u.ipv4.dst; + flow4.saddr = encap->key.u.ipv4.src; + break; + case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6: + case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6: + flow6.flowi6_proto = IPPROTO_UDP; + flow6.fl6_dport = encap->key.tp_dst; + flow6.flowlabel = ip6_make_flowinfo(encap->key.tos, + encap->key.label); + flow6.daddr = encap->key.u.ipv6.dst; + flow6.saddr = encap->key.u.ipv6.src; + break; + default: + NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported encap type %d", + (int)encap->type); + return -EOPNOTSUPP; + } + + neigh = kzalloc(sizeof(*neigh), GFP_KERNEL_ACCOUNT); + if (!neigh) + return -ENOMEM; + neigh->net = get_net_track(net, &neigh->ns_tracker, GFP_KERNEL_ACCOUNT); + neigh->dst_ip = flow4.daddr; + neigh->dst_ip6 = flow6.daddr; + + old = rhashtable_lookup_get_insert_fast(&efx->tc->neigh_ht, + &neigh->linkage, + efx_neigh_ht_params); + if (old) { + /* don't need our new entry */ + put_net_track(neigh->net, &neigh->ns_tracker); + kfree(neigh); + if (!refcount_inc_not_zero(&old->ref)) + return -EAGAIN; + /* existing entry found, ref taken */ + neigh = old; + } else { + /* New entry. We need to initiate a lookup */ + struct neighbour *n; + struct rtable *rt; + + if (encap->type & EFX_ENCAP_FLAG_IPV6) { +#if IS_ENABLED(CONFIG_IPV6) + struct dst_entry *dst; + + dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &flow6, + NULL); + rc = PTR_ERR_OR_ZERO(dst); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for IPv6 encap"); + goto out_free; + } + neigh->egdev = dst->dev; + netdev_hold(neigh->egdev, &neigh->dev_tracker, + GFP_KERNEL_ACCOUNT); + neigh->ttl = ip6_dst_hoplimit(dst); + n = dst_neigh_lookup(dst, &flow6.daddr); + dst_release(dst); +#else + /* We shouldn't ever get here, because if IPv6 isn't + * enabled how did someone create an IPv6 tunnel_key? + */ + rc = -EOPNOTSUPP; + NL_SET_ERR_MSG_MOD(extack, "No IPv6 support (neigh bind)"); + goto out_free; +#endif + } else { + rt = ip_route_output_key(net, &flow4); + if (IS_ERR_OR_NULL(rt)) { + rc = PTR_ERR_OR_ZERO(rt); + if (!rc) + rc = -EIO; + NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for encap"); + goto out_free; + } + neigh->egdev = rt->dst.dev; + netdev_hold(neigh->egdev, &neigh->dev_tracker, + GFP_KERNEL_ACCOUNT); + neigh->ttl = ip4_dst_hoplimit(&rt->dst); + n = dst_neigh_lookup(&rt->dst, &flow4.daddr); + ip_rt_put(rt); + } + if (!n) { + rc = -ENETUNREACH; + NL_SET_ERR_MSG_MOD(extack, "Failed to lookup neighbour for encap"); + netdev_put(neigh->egdev, &neigh->dev_tracker); + goto out_free; + } + refcount_set(&neigh->ref, 1); + INIT_LIST_HEAD(&neigh->users); + read_lock_bh(&n->lock); + ether_addr_copy(neigh->ha, n->ha); + neigh->n_valid = n->nud_state & NUD_VALID; + read_unlock_bh(&n->lock); + rwlock_init(&neigh->lock); + INIT_WORK(&neigh->work, efx_neigh_update); + neigh->efx = efx; + neigh->used = jiffies; + if (!neigh->n_valid) + /* Prod ARP to find us a neighbour */ + neigh_event_send(n, NULL); + neigh_release(n); + } + /* Add us to this neigh */ + encap->neigh = neigh; + list_add_tail(&encap->list, &neigh->users); + return 0; + +out_free: + /* cleanup common to several error paths */ + rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage, + efx_neigh_ht_params); + synchronize_rcu(); + put_net_track(net, &neigh->ns_tracker); + kfree(neigh); + return rc; +} + +static void efx_free_neigh(struct efx_neigh_binder *neigh) +{ + struct efx_nic *efx = neigh->efx; + + rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage, + efx_neigh_ht_params); + synchronize_rcu(); + netdev_put(neigh->egdev, &neigh->dev_tracker); + put_net_track(neigh->net, &neigh->ns_tracker); + kfree(neigh); +} + +static void efx_release_neigh(struct efx_nic *efx, + struct efx_tc_encap_action *encap) +{ + struct efx_neigh_binder *neigh = encap->neigh; + + if (!neigh) + return; + list_del(&encap->list); + encap->neigh = NULL; + if (!refcount_dec_and_test(&neigh->ref)) + return; /* still in use */ + efx_free_neigh(neigh); +} + +static void efx_gen_tun_header_eth(struct efx_tc_encap_action *encap, u16 proto) +{ + struct efx_neigh_binder *neigh = encap->neigh; + struct ethhdr *eth; + + encap->encap_hdr_len = sizeof(*eth); + eth = (struct ethhdr *)encap->encap_hdr; + + if (encap->neigh->n_valid) + ether_addr_copy(eth->h_dest, neigh->ha); + else + eth_zero_addr(eth->h_dest); + ether_addr_copy(eth->h_source, neigh->egdev->dev_addr); + eth->h_proto = htons(proto); +} + +static void efx_gen_tun_header_ipv4(struct efx_tc_encap_action *encap, u8 ipproto, u8 len) +{ + struct efx_neigh_binder *neigh = encap->neigh; + struct ip_tunnel_key *key = &encap->key; + struct iphdr *ip; + + ip = (struct iphdr *)(encap->encap_hdr + encap->encap_hdr_len); + encap->encap_hdr_len += sizeof(*ip); + + ip->daddr = key->u.ipv4.dst; + ip->saddr = key->u.ipv4.src; + ip->ttl = neigh->ttl; + ip->protocol = ipproto; + ip->version = 0x4; + ip->ihl = 0x5; + ip->tot_len = cpu_to_be16(ip->ihl * 4 + len); + ip_send_check(ip); +} + +#ifdef CONFIG_IPV6 +static void efx_gen_tun_header_ipv6(struct efx_tc_encap_action *encap, u8 ipproto, u8 len) +{ + struct efx_neigh_binder *neigh = encap->neigh; + struct ip_tunnel_key *key = &encap->key; + struct ipv6hdr *ip; + + ip = (struct ipv6hdr *)(encap->encap_hdr + encap->encap_hdr_len); + encap->encap_hdr_len += sizeof(*ip); + + ip6_flow_hdr(ip, key->tos, key->label); + ip->daddr = key->u.ipv6.dst; + ip->saddr = key->u.ipv6.src; + ip->hop_limit = neigh->ttl; + ip->nexthdr = ipproto; + ip->version = 0x6; + ip->payload_len = cpu_to_be16(len); +} +#endif + +static void efx_gen_tun_header_udp(struct efx_tc_encap_action *encap, u8 len) +{ + struct ip_tunnel_key *key = &encap->key; + struct udphdr *udp; + + udp = (struct udphdr *)(encap->encap_hdr + encap->encap_hdr_len); + encap->encap_hdr_len += sizeof(*udp); + + udp->dest = key->tp_dst; + udp->len = cpu_to_be16(sizeof(*udp) + len); +} + +static void efx_gen_tun_header_vxlan(struct efx_tc_encap_action *encap) +{ + struct ip_tunnel_key *key = &encap->key; + struct vxlanhdr *vxlan; + + vxlan = (struct vxlanhdr *)(encap->encap_hdr + encap->encap_hdr_len); + encap->encap_hdr_len += sizeof(*vxlan); + + vxlan->vx_flags = VXLAN_HF_VNI; + vxlan->vx_vni = vxlan_vni_field(tunnel_id_to_key32(key->tun_id)); +} + +static void efx_gen_tun_header_geneve(struct efx_tc_encap_action *encap) +{ + struct ip_tunnel_key *key = &encap->key; + struct genevehdr *geneve; + u32 vni; + + geneve = (struct genevehdr *)(encap->encap_hdr + encap->encap_hdr_len); + encap->encap_hdr_len += sizeof(*geneve); + + geneve->proto_type = htons(ETH_P_TEB); + /* convert tun_id to host-endian so we can use host arithmetic to + * extract individual bytes. + */ + vni = ntohl(tunnel_id_to_key32(key->tun_id)); + geneve->vni[0] = vni >> 16; + geneve->vni[1] = vni >> 8; + geneve->vni[2] = vni; +} + +#define vxlan_header_l4_len (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) +#define vxlan4_header_len (sizeof(struct ethhdr) + sizeof(struct iphdr) + vxlan_header_l4_len) +static void efx_gen_vxlan_header_ipv4(struct efx_tc_encap_action *encap) +{ + BUILD_BUG_ON(sizeof(encap->encap_hdr) < vxlan4_header_len); + efx_gen_tun_header_eth(encap, ETH_P_IP); + efx_gen_tun_header_ipv4(encap, IPPROTO_UDP, vxlan_header_l4_len); + efx_gen_tun_header_udp(encap, sizeof(struct vxlanhdr)); + efx_gen_tun_header_vxlan(encap); +} + +#define geneve_header_l4_len (sizeof(struct udphdr) + sizeof(struct genevehdr)) +#define geneve4_header_len (sizeof(struct ethhdr) + sizeof(struct iphdr) + geneve_header_l4_len) +static void efx_gen_geneve_header_ipv4(struct efx_tc_encap_action *encap) +{ + BUILD_BUG_ON(sizeof(encap->encap_hdr) < geneve4_header_len); + efx_gen_tun_header_eth(encap, ETH_P_IP); + efx_gen_tun_header_ipv4(encap, IPPROTO_UDP, geneve_header_l4_len); + efx_gen_tun_header_udp(encap, sizeof(struct genevehdr)); + efx_gen_tun_header_geneve(encap); +} + +#ifdef CONFIG_IPV6 +#define vxlan6_header_len (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + vxlan_header_l4_len) +static void efx_gen_vxlan_header_ipv6(struct efx_tc_encap_action *encap) +{ + BUILD_BUG_ON(sizeof(encap->encap_hdr) < vxlan6_header_len); + efx_gen_tun_header_eth(encap, ETH_P_IPV6); + efx_gen_tun_header_ipv6(encap, IPPROTO_UDP, vxlan_header_l4_len); + efx_gen_tun_header_udp(encap, sizeof(struct vxlanhdr)); + efx_gen_tun_header_vxlan(encap); +} + +#define geneve6_header_len (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + geneve_header_l4_len) +static void efx_gen_geneve_header_ipv6(struct efx_tc_encap_action *encap) +{ + BUILD_BUG_ON(sizeof(encap->encap_hdr) < geneve6_header_len); + efx_gen_tun_header_eth(encap, ETH_P_IPV6); + efx_gen_tun_header_ipv6(encap, IPPROTO_UDP, geneve_header_l4_len); + efx_gen_tun_header_udp(encap, sizeof(struct genevehdr)); + efx_gen_tun_header_geneve(encap); +} +#endif + +static void efx_gen_encap_header(struct efx_nic *efx, + struct efx_tc_encap_action *encap) +{ + encap->n_valid = encap->neigh->n_valid; + + /* GCC stupidly thinks that only values explicitly listed in the enum + * definition can _possibly_ be sensible case values, so without this + * cast it complains about the IPv6 versions. + */ + switch ((int)encap->type) { + case EFX_ENCAP_TYPE_VXLAN: + efx_gen_vxlan_header_ipv4(encap); + break; + case EFX_ENCAP_TYPE_GENEVE: + efx_gen_geneve_header_ipv4(encap); + break; +#ifdef CONFIG_IPV6 + case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6: + efx_gen_vxlan_header_ipv6(encap); + break; + case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6: + efx_gen_geneve_header_ipv6(encap); + break; +#endif + default: + /* unhandled encap type, can't happen */ + if (net_ratelimit()) + netif_err(efx, drv, efx->net_dev, + "Bogus encap type %d, can't generate\n", + encap->type); + + /* Use fallback action. */ + encap->n_valid = false; + break; + } +} + +static void efx_tc_update_encap(struct efx_nic *efx, + struct efx_tc_encap_action *encap) +{ + struct efx_tc_action_set_list *acts, *fallback; + struct efx_tc_flow_rule *rule; + struct efx_tc_action_set *act; + int rc; + + if (encap->n_valid) { + /* Make sure no rules are using this encap while we change it */ + list_for_each_entry(act, &encap->users, encap_user) { + acts = act->user; + if (WARN_ON(!acts)) /* can't happen */ + continue; + rule = container_of(acts, struct efx_tc_flow_rule, acts); + if (rule->fallback) + fallback = rule->fallback; + else /* fallback fallback: deliver to PF */ + fallback = &efx->tc->facts.pf; + rc = efx_mae_update_rule(efx, fallback->fw_id, + rule->fw_id); + if (rc) + netif_err(efx, drv, efx->net_dev, + "Failed to update (f) rule %08x rc %d\n", + rule->fw_id, rc); + else + netif_dbg(efx, drv, efx->net_dev, "Updated (f) rule %08x\n", + rule->fw_id); + } + } + + /* Make sure we don't leak arbitrary bytes on the wire; + * set an all-0s ethernet header. A successful call to + * efx_gen_encap_header() will overwrite this. + */ + memset(encap->encap_hdr, 0, sizeof(encap->encap_hdr)); + encap->encap_hdr_len = ETH_HLEN; + + if (encap->neigh) { + read_lock_bh(&encap->neigh->lock); + efx_gen_encap_header(efx, encap); + read_unlock_bh(&encap->neigh->lock); + } else { + encap->n_valid = false; + } + + rc = efx_mae_update_encap_md(efx, encap); + if (rc) { + netif_err(efx, drv, efx->net_dev, + "Failed to update encap hdr %08x rc %d\n", + encap->fw_id, rc); + return; + } + netif_dbg(efx, drv, efx->net_dev, "Updated encap hdr %08x\n", + encap->fw_id); + if (!encap->n_valid) + return; + /* Update rule users: use the action if they are now ready */ + list_for_each_entry(act, &encap->users, encap_user) { + acts = act->user; + if (WARN_ON(!acts)) /* can't happen */ + continue; + rule = container_of(acts, struct efx_tc_flow_rule, acts); + if (!efx_tc_check_ready(efx, rule)) + continue; + rc = efx_mae_update_rule(efx, acts->fw_id, rule->fw_id); + if (rc) + netif_err(efx, drv, efx->net_dev, + "Failed to update rule %08x rc %d\n", + rule->fw_id, rc); + else + netif_dbg(efx, drv, efx->net_dev, "Updated rule %08x\n", + rule->fw_id); + } +} + +static void efx_neigh_update(struct work_struct *work) +{ + struct efx_neigh_binder *neigh = container_of(work, struct efx_neigh_binder, work); + struct efx_tc_encap_action *encap; + struct efx_nic *efx = neigh->efx; + + mutex_lock(&efx->tc->mutex); + list_for_each_entry(encap, &neigh->users, list) + efx_tc_update_encap(neigh->efx, encap); + /* release ref taken in efx_neigh_event() */ + if (refcount_dec_and_test(&neigh->ref)) + efx_free_neigh(neigh); + mutex_unlock(&efx->tc->mutex); +} + +static int efx_neigh_event(struct efx_nic *efx, struct neighbour *n) +{ + struct efx_neigh_binder keys = {NULL}, *neigh; + bool n_valid, ipv6 = false; + char ha[ETH_ALEN]; + size_t keysize; + + if (WARN_ON(!efx->tc)) + return NOTIFY_DONE; + + if (n->tbl == &arp_tbl) { + keysize = sizeof(keys.dst_ip); +#if IS_ENABLED(CONFIG_IPV6) + } else if (n->tbl == ipv6_stub->nd_tbl) { + ipv6 = true; + keysize = sizeof(keys.dst_ip6); +#endif + } else { + return NOTIFY_DONE; + } + if (!n->parms) { + netif_warn(efx, drv, efx->net_dev, "neigh_event with no parms!\n"); + return NOTIFY_DONE; + } + keys.net = read_pnet(&n->parms->net); + if (n->tbl->key_len != keysize) { + netif_warn(efx, drv, efx->net_dev, "neigh_event with bad key_len %u\n", + n->tbl->key_len); + return NOTIFY_DONE; + } + read_lock_bh(&n->lock); /* Get a consistent view */ + memcpy(ha, n->ha, ETH_ALEN); + n_valid = (n->nud_state & NUD_VALID) && !n->dead; + read_unlock_bh(&n->lock); + if (ipv6) + memcpy(&keys.dst_ip6, n->primary_key, n->tbl->key_len); + else + memcpy(&keys.dst_ip, n->primary_key, n->tbl->key_len); + rcu_read_lock(); + neigh = rhashtable_lookup_fast(&efx->tc->neigh_ht, &keys, + efx_neigh_ht_params); + if (!neigh || neigh->dying) + /* We're not interested in this neighbour */ + goto done; + write_lock_bh(&neigh->lock); + if (n_valid == neigh->n_valid && !memcmp(ha, neigh->ha, ETH_ALEN)) { + write_unlock_bh(&neigh->lock); + /* Nothing has changed; no work to do */ + goto done; + } + neigh->n_valid = n_valid; + memcpy(neigh->ha, ha, ETH_ALEN); + write_unlock_bh(&neigh->lock); + if (refcount_inc_not_zero(&neigh->ref)) { + rcu_read_unlock(); + if (!schedule_work(&neigh->work)) + /* failed to schedule, release the ref we just took */ + if (refcount_dec_and_test(&neigh->ref)) + efx_free_neigh(neigh); + } else { +done: + rcu_read_unlock(); + } + return NOTIFY_DONE; +} + +bool efx_tc_check_ready(struct efx_nic *efx, struct efx_tc_flow_rule *rule) +{ + struct efx_tc_action_set *act; + + /* Encap actions can only be offloaded if they have valid + * neighbour info for the outer Ethernet header. + */ + list_for_each_entry(act, &rule->acts.list, list) + if (act->encap_md && !act->encap_md->n_valid) + return false; + return true; +} + +struct efx_tc_encap_action *efx_tc_flower_create_encap_md( + struct efx_nic *efx, const struct ip_tunnel_info *info, + struct net_device *egdev, struct netlink_ext_ack *extack) +{ + enum efx_encap_type type = efx_tc_indr_netdev_type(egdev); + struct efx_tc_encap_action *encap, *old; + struct efx_rep *to_efv; + s64 rc; + + if (type == EFX_ENCAP_TYPE_NONE) { + /* dest is not an encap device */ + NL_SET_ERR_MSG_MOD(extack, "Not a (supported) tunnel device but tunnel_key is set"); + return ERR_PTR(-EOPNOTSUPP); + } + rc = efx_mae_check_encap_type_supported(efx, type); + if (rc < 0) { + NL_SET_ERR_MSG_MOD(extack, "Firmware reports no support for this tunnel type"); + return ERR_PTR(rc); + } + /* No support yet for Geneve options */ + if (info->options_len) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel options"); + return ERR_PTR(-EOPNOTSUPP); + } + switch (info->mode) { + case IP_TUNNEL_INFO_TX: + break; + case IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6: + type |= EFX_ENCAP_FLAG_IPV6; + break; + default: + NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported tunnel mode %u", + info->mode); + return ERR_PTR(-EOPNOTSUPP); + } + encap = kzalloc(sizeof(*encap), GFP_KERNEL_ACCOUNT); + if (!encap) + return ERR_PTR(-ENOMEM); + encap->type = type; + encap->key = info->key; + INIT_LIST_HEAD(&encap->users); + old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_ht, + &encap->linkage, + efx_tc_encap_ht_params); + if (old) { + /* don't need our new entry */ + kfree(encap); + if (!refcount_inc_not_zero(&old->ref)) + return ERR_PTR(-EAGAIN); + /* existing entry found, ref taken */ + return old; + } + + rc = efx_bind_neigh(efx, encap, dev_net(egdev), extack); + if (rc < 0) + goto out_remove; + to_efv = efx_tc_flower_lookup_efv(efx, encap->neigh->egdev); + if (IS_ERR(to_efv)) { + /* neigh->egdev isn't ours */ + NL_SET_ERR_MSG_MOD(extack, "Tunnel egress device not on switch"); + rc = PTR_ERR(to_efv); + goto out_release; + } + rc = efx_tc_flower_external_mport(efx, to_efv); + if (rc < 0) { + NL_SET_ERR_MSG_MOD(extack, "Failed to identify tunnel egress m-port"); + goto out_release; + } + encap->dest_mport = rc; + read_lock_bh(&encap->neigh->lock); + efx_gen_encap_header(efx, encap); + read_unlock_bh(&encap->neigh->lock); + + rc = efx_mae_allocate_encap_md(efx, encap); + if (rc < 0) { + NL_SET_ERR_MSG_MOD(extack, "Failed to write tunnel header to hw"); + goto out_release; + } + + /* ref and return */ + refcount_set(&encap->ref, 1); + return encap; +out_release: + efx_release_neigh(efx, encap); +out_remove: + rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage, + efx_tc_encap_ht_params); + kfree(encap); + return ERR_PTR(rc); +} + +void efx_tc_flower_release_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap) +{ + if (!refcount_dec_and_test(&encap->ref)) + return; /* still in use */ + efx_release_neigh(efx, encap); + rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage, + efx_tc_encap_ht_params); + efx_mae_free_encap_md(efx, encap); + kfree(encap); +} + +static void efx_tc_remove_neigh_users(struct efx_nic *efx, struct efx_neigh_binder *neigh) +{ + struct efx_tc_encap_action *encap, *next; + + list_for_each_entry_safe(encap, next, &neigh->users, list) { + /* Should cause neigh usage count to fall to zero, freeing it */ + efx_release_neigh(efx, encap); + /* The encap has lost its neigh, so it's now unready */ + efx_tc_update_encap(efx, encap); + } +} + +void efx_tc_unregister_egdev(struct efx_nic *efx, struct net_device *net_dev) +{ + struct efx_neigh_binder *neigh; + struct rhashtable_iter walk; + + mutex_lock(&efx->tc->mutex); + rhashtable_walk_enter(&efx->tc->neigh_ht, &walk); + rhashtable_walk_start(&walk); + while ((neigh = rhashtable_walk_next(&walk)) != NULL) { + if (IS_ERR(neigh)) + continue; + if (neigh->egdev != net_dev) + continue; + neigh->dying = true; + rhashtable_walk_stop(&walk); + synchronize_rcu(); /* Make sure any updates see dying flag */ + efx_tc_remove_neigh_users(efx, neigh); /* might sleep */ + rhashtable_walk_start(&walk); + } + rhashtable_walk_stop(&walk); + rhashtable_walk_exit(&walk); + mutex_unlock(&efx->tc->mutex); +} + +int efx_tc_netevent_event(struct efx_nic *efx, unsigned long event, + void *ptr) +{ + if (efx->type->is_vf) + return NOTIFY_DONE; + + switch (event) { + case NETEVENT_NEIGH_UPDATE: + return efx_neigh_event(efx, ptr); + default: + return NOTIFY_DONE; + } +} diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.h b/drivers/net/ethernet/sfc/tc_encap_actions.h new file mode 100644 index 000000000000..c3c7904ad7ff --- /dev/null +++ b/drivers/net/ethernet/sfc/tc_encap_actions.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2023, Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_TC_ENCAP_ACTIONS_H +#define EFX_TC_ENCAP_ACTIONS_H +#include "net_driver.h" + +#if IS_ENABLED(CONFIG_SFC_SRIOV) +#include <linux/refcount.h> +#include <net/tc_act/tc_tunnel_key.h> + +/** + * struct efx_neigh_binder - driver state for a neighbour entry + * @net: the network namespace in which this neigh resides + * @dst_ip: the IPv4 destination address resolved by this neigh + * @dst_ip6: the IPv6 destination address resolved by this neigh + * @ha: the hardware (Ethernet) address of the neighbour + * @n_valid: true if the neighbour is in NUD_VALID state + * @lock: protects @ha and @n_valid + * @ttl: Time To Live associated with the route used + * @dying: set when egdev is going away, to skip further updates + * @egdev: egress device from the route lookup. Holds a reference + * @dev_tracker: reference tracker entry for @egdev + * @ns_tracker: reference tracker entry for @ns + * @ref: counts encap actions referencing this entry + * @used: jiffies of last time traffic hit any encap action using this. + * When counter reads update this, a new neighbour event is sent to + * indicate that the neighbour entry is still in use. + * @users: list of &struct efx_tc_encap_action + * @linkage: entry in efx->neigh_ht (keys are @net, @dst_ip, @dst_ip6). + * @work: processes neighbour state changes, updates the encap actions + * @efx: owning NIC instance. + * + * Associates a neighbour entry with the encap actions that are + * interested in it, allowing the latter to be updated when the + * neighbour details change. + * Whichever of @dst_ip and @dst_ip6 is not in use will be all-zeroes, + * this distinguishes IPv4 from IPv6 entries. + */ +struct efx_neigh_binder { + struct net *net; + __be32 dst_ip; + struct in6_addr dst_ip6; + char ha[ETH_ALEN]; + bool n_valid; + rwlock_t lock; + u8 ttl; + bool dying; + struct net_device *egdev; + netdevice_tracker dev_tracker; + netns_tracker ns_tracker; + refcount_t ref; + unsigned long used; + struct list_head users; + struct rhash_head linkage; + struct work_struct work; + struct efx_nic *efx; +}; + +/* This limit is arbitrary; current hardware (SN1022) handles encap headers + * of up to 126 bytes, but that limit is not enshrined in the MCDI protocol. + */ +#define EFX_TC_MAX_ENCAP_HDR 126 +struct efx_tc_encap_action { + enum efx_encap_type type; + struct ip_tunnel_key key; /* 52 bytes */ + u32 dest_mport; /* is copied into struct efx_tc_action_set */ + u8 encap_hdr_len; + bool n_valid; + u8 encap_hdr[EFX_TC_MAX_ENCAP_HDR]; + struct efx_neigh_binder *neigh; + struct list_head list; /* entry on neigh->users list */ + struct list_head users; /* action sets using this encap_md */ + struct rhash_head linkage; /* efx->tc_encap_ht */ + refcount_t ref; + u32 fw_id; /* index of this entry in firmware encap table */ +}; + +/* create/uncreate/teardown hashtables */ +int efx_tc_init_encap_actions(struct efx_nic *efx); +void efx_tc_destroy_encap_actions(struct efx_nic *efx); +void efx_tc_fini_encap_actions(struct efx_nic *efx); + +struct efx_tc_flow_rule; +bool efx_tc_check_ready(struct efx_nic *efx, struct efx_tc_flow_rule *rule); + +struct efx_tc_encap_action *efx_tc_flower_create_encap_md( + struct efx_nic *efx, const struct ip_tunnel_info *info, + struct net_device *egdev, struct netlink_ext_ack *extack); +void efx_tc_flower_release_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap); + +void efx_tc_unregister_egdev(struct efx_nic *efx, struct net_device *net_dev); +int efx_tc_netevent_event(struct efx_nic *efx, unsigned long event, + void *ptr); + +#else /* CONFIG_SFC_SRIOV */ + +static inline int efx_tc_netevent_event(struct efx_nic *efx, + unsigned long event, void *ptr) +{ + return NOTIFY_DONE; +} + +#endif /* CONFIG_SFC_SRIOV */ + +#endif /* EFX_TC_ENCAP_ACTIONS_H */ diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c index 755aa92bf823..9f2393d34371 100644 --- a/drivers/net/ethernet/sfc/tx_common.c +++ b/drivers/net/ethernet/sfc/tx_common.c @@ -12,6 +12,7 @@ #include "efx.h" #include "nic_common.h" #include "tx_common.h" +#include <net/gso.h> static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue) { diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 2d7347b71c41..0dcd6a568b06 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -1851,6 +1851,17 @@ static int netsec_of_probe(struct platform_device *pdev, return err; } + /* + * SynQuacer is physically configured with TX and RX delays + * but the standard firmware claimed otherwise for a long + * time, ignore it. + */ + if (of_machine_is_compatible("socionext,developer-box") && + priv->phy_interface != PHY_INTERFACE_MODE_RGMII_ID) { + dev_warn(&pdev->dev, "Outdated firmware reports incorrect PHY mode, overriding\n"); + priv->phy_interface = PHY_INTERFACE_MODE_RGMII_ID; + } + priv->phy_np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); if (!priv->phy_np) { dev_err(&pdev->dev, "missing required property 'phy-handle'\n"); diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 5f5a997f21f3..5583f0b055ec 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -158,6 +158,9 @@ config DWMAC_SOCFPGA default ARCH_INTEL_SOCFPGA depends on OF && (ARCH_INTEL_SOCFPGA || COMPILE_TEST) select MFD_SYSCON + select MDIO_REGMAP + select REGMAP_MMIO + select PCS_LYNX help Support for ethernet controller on Altera SOCFPGA diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 8738fdbb4b2d..7dd3d388068b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -35,7 +35,7 @@ obj-$(CONFIG_DWMAC_IMX8) += dwmac-imx.o obj-$(CONFIG_DWMAC_TEGRA) += dwmac-tegra.o obj-$(CONFIG_DWMAC_VISCONTI) += dwmac-visconti.o stmmac-platform-objs:= stmmac_platform.o -dwmac-altr-socfpga-objs := altr_tse_pcs.o dwmac-socfpga.o +dwmac-altr-socfpga-objs := dwmac-socfpga.o obj-$(CONFIG_STMMAC_PCI) += stmmac-pci.o obj-$(CONFIG_DWMAC_INTEL) += dwmac-intel.o diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c deleted file mode 100644 index 00f6d347eaf7..000000000000 --- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c +++ /dev/null @@ -1,257 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright Altera Corporation (C) 2016. All rights reserved. - * - * Author: Tien Hock Loh <thloh@altera.com> - */ - -#include <linux/mfd/syscon.h> -#include <linux/of.h> -#include <linux/of_address.h> -#include <linux/of_net.h> -#include <linux/phy.h> -#include <linux/regmap.h> -#include <linux/reset.h> -#include <linux/stmmac.h> - -#include "stmmac.h" -#include "stmmac_platform.h" -#include "altr_tse_pcs.h" - -#define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII 0 -#define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII BIT(1) -#define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII BIT(2) -#define SYSMGR_EMACGRP_CTRL_PHYSEL_WIDTH 2 -#define SYSMGR_EMACGRP_CTRL_PHYSEL_MASK GENMASK(1, 0) - -#define TSE_PCS_CONTROL_AN_EN_MASK BIT(12) -#define TSE_PCS_CONTROL_REG 0x00 -#define TSE_PCS_CONTROL_RESTART_AN_MASK BIT(9) -#define TSE_PCS_CTRL_AUTONEG_SGMII 0x1140 -#define TSE_PCS_IF_MODE_REG 0x28 -#define TSE_PCS_LINK_TIMER_0_REG 0x24 -#define TSE_PCS_LINK_TIMER_1_REG 0x26 -#define TSE_PCS_SIZE 0x40 -#define TSE_PCS_STATUS_AN_COMPLETED_MASK BIT(5) -#define TSE_PCS_STATUS_LINK_MASK 0x0004 -#define TSE_PCS_STATUS_REG 0x02 -#define TSE_PCS_SGMII_SPEED_1000 BIT(3) -#define TSE_PCS_SGMII_SPEED_100 BIT(2) -#define TSE_PCS_SGMII_SPEED_10 0x0 -#define TSE_PCS_SW_RST_MASK 0x8000 -#define TSE_PCS_PARTNER_ABILITY_REG 0x0A -#define TSE_PCS_PARTNER_DUPLEX_FULL 0x1000 -#define TSE_PCS_PARTNER_DUPLEX_HALF 0x0000 -#define TSE_PCS_PARTNER_DUPLEX_MASK 0x1000 -#define TSE_PCS_PARTNER_SPEED_MASK GENMASK(11, 10) -#define TSE_PCS_PARTNER_SPEED_1000 BIT(11) -#define TSE_PCS_PARTNER_SPEED_100 BIT(10) -#define TSE_PCS_PARTNER_SPEED_10 0x0000 -#define TSE_PCS_PARTNER_SPEED_1000 BIT(11) -#define TSE_PCS_PARTNER_SPEED_100 BIT(10) -#define TSE_PCS_PARTNER_SPEED_10 0x0000 -#define TSE_PCS_SGMII_SPEED_MASK GENMASK(3, 2) -#define TSE_PCS_SGMII_LINK_TIMER_0 0x0D40 -#define TSE_PCS_SGMII_LINK_TIMER_1 0x0003 -#define TSE_PCS_SW_RESET_TIMEOUT 100 -#define TSE_PCS_USE_SGMII_AN_MASK BIT(1) -#define TSE_PCS_USE_SGMII_ENA BIT(0) -#define TSE_PCS_IF_USE_SGMII 0x03 - -#define AUTONEGO_LINK_TIMER 20 - -static int tse_pcs_reset(void __iomem *base, struct tse_pcs *pcs) -{ - int counter = 0; - u16 val; - - val = readw(base + TSE_PCS_CONTROL_REG); - val |= TSE_PCS_SW_RST_MASK; - writew(val, base + TSE_PCS_CONTROL_REG); - - while (counter < TSE_PCS_SW_RESET_TIMEOUT) { - val = readw(base + TSE_PCS_CONTROL_REG); - val &= TSE_PCS_SW_RST_MASK; - if (val == 0) - break; - counter++; - udelay(1); - } - if (counter >= TSE_PCS_SW_RESET_TIMEOUT) { - dev_err(pcs->dev, "PCS could not get out of sw reset\n"); - return -ETIMEDOUT; - } - - return 0; -} - -int tse_pcs_init(void __iomem *base, struct tse_pcs *pcs) -{ - int ret = 0; - - writew(TSE_PCS_IF_USE_SGMII, base + TSE_PCS_IF_MODE_REG); - - writew(TSE_PCS_CTRL_AUTONEG_SGMII, base + TSE_PCS_CONTROL_REG); - - writew(TSE_PCS_SGMII_LINK_TIMER_0, base + TSE_PCS_LINK_TIMER_0_REG); - writew(TSE_PCS_SGMII_LINK_TIMER_1, base + TSE_PCS_LINK_TIMER_1_REG); - - ret = tse_pcs_reset(base, pcs); - if (ret == 0) - writew(SGMII_ADAPTER_ENABLE, - pcs->sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); - - return ret; -} - -static void pcs_link_timer_callback(struct tse_pcs *pcs) -{ - u16 val = 0; - void __iomem *tse_pcs_base = pcs->tse_pcs_base; - void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base; - - val = readw(tse_pcs_base + TSE_PCS_STATUS_REG); - val &= TSE_PCS_STATUS_LINK_MASK; - - if (val != 0) { - dev_dbg(pcs->dev, "Adapter: Link is established\n"); - writew(SGMII_ADAPTER_ENABLE, - sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); - } else { - mod_timer(&pcs->aneg_link_timer, jiffies + - msecs_to_jiffies(AUTONEGO_LINK_TIMER)); - } -} - -static void auto_nego_timer_callback(struct tse_pcs *pcs) -{ - u16 val = 0; - u16 speed = 0; - u16 duplex = 0; - void __iomem *tse_pcs_base = pcs->tse_pcs_base; - void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base; - - val = readw(tse_pcs_base + TSE_PCS_STATUS_REG); - val &= TSE_PCS_STATUS_AN_COMPLETED_MASK; - - if (val != 0) { - dev_dbg(pcs->dev, "Adapter: Auto Negotiation is completed\n"); - val = readw(tse_pcs_base + TSE_PCS_PARTNER_ABILITY_REG); - speed = val & TSE_PCS_PARTNER_SPEED_MASK; - duplex = val & TSE_PCS_PARTNER_DUPLEX_MASK; - - if (speed == TSE_PCS_PARTNER_SPEED_10 && - duplex == TSE_PCS_PARTNER_DUPLEX_FULL) - dev_dbg(pcs->dev, - "Adapter: Link Partner is Up - 10/Full\n"); - else if (speed == TSE_PCS_PARTNER_SPEED_100 && - duplex == TSE_PCS_PARTNER_DUPLEX_FULL) - dev_dbg(pcs->dev, - "Adapter: Link Partner is Up - 100/Full\n"); - else if (speed == TSE_PCS_PARTNER_SPEED_1000 && - duplex == TSE_PCS_PARTNER_DUPLEX_FULL) - dev_dbg(pcs->dev, - "Adapter: Link Partner is Up - 1000/Full\n"); - else if (speed == TSE_PCS_PARTNER_SPEED_10 && - duplex == TSE_PCS_PARTNER_DUPLEX_HALF) - dev_err(pcs->dev, - "Adapter does not support Half Duplex\n"); - else if (speed == TSE_PCS_PARTNER_SPEED_100 && - duplex == TSE_PCS_PARTNER_DUPLEX_HALF) - dev_err(pcs->dev, - "Adapter does not support Half Duplex\n"); - else if (speed == TSE_PCS_PARTNER_SPEED_1000 && - duplex == TSE_PCS_PARTNER_DUPLEX_HALF) - dev_err(pcs->dev, - "Adapter does not support Half Duplex\n"); - else - dev_err(pcs->dev, - "Adapter: Invalid Partner Speed and Duplex\n"); - - if (duplex == TSE_PCS_PARTNER_DUPLEX_FULL && - (speed == TSE_PCS_PARTNER_SPEED_10 || - speed == TSE_PCS_PARTNER_SPEED_100 || - speed == TSE_PCS_PARTNER_SPEED_1000)) - writew(SGMII_ADAPTER_ENABLE, - sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); - } else { - val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG); - val |= TSE_PCS_CONTROL_RESTART_AN_MASK; - writew(val, tse_pcs_base + TSE_PCS_CONTROL_REG); - - tse_pcs_reset(tse_pcs_base, pcs); - mod_timer(&pcs->aneg_link_timer, jiffies + - msecs_to_jiffies(AUTONEGO_LINK_TIMER)); - } -} - -static void aneg_link_timer_callback(struct timer_list *t) -{ - struct tse_pcs *pcs = from_timer(pcs, t, aneg_link_timer); - - if (pcs->autoneg == AUTONEG_ENABLE) - auto_nego_timer_callback(pcs); - else if (pcs->autoneg == AUTONEG_DISABLE) - pcs_link_timer_callback(pcs); -} - -void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev, - unsigned int speed) -{ - void __iomem *tse_pcs_base = pcs->tse_pcs_base; - u32 val; - - pcs->autoneg = phy_dev->autoneg; - - if (phy_dev->autoneg == AUTONEG_ENABLE) { - val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG); - val |= TSE_PCS_CONTROL_AN_EN_MASK; - writew(val, tse_pcs_base + TSE_PCS_CONTROL_REG); - - val = readw(tse_pcs_base + TSE_PCS_IF_MODE_REG); - val |= TSE_PCS_USE_SGMII_AN_MASK; - writew(val, tse_pcs_base + TSE_PCS_IF_MODE_REG); - - val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG); - val |= TSE_PCS_CONTROL_RESTART_AN_MASK; - - tse_pcs_reset(tse_pcs_base, pcs); - - timer_setup(&pcs->aneg_link_timer, aneg_link_timer_callback, - 0); - mod_timer(&pcs->aneg_link_timer, jiffies + - msecs_to_jiffies(AUTONEGO_LINK_TIMER)); - } else if (phy_dev->autoneg == AUTONEG_DISABLE) { - val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG); - val &= ~TSE_PCS_CONTROL_AN_EN_MASK; - writew(val, tse_pcs_base + TSE_PCS_CONTROL_REG); - - val = readw(tse_pcs_base + TSE_PCS_IF_MODE_REG); - val &= ~TSE_PCS_USE_SGMII_AN_MASK; - writew(val, tse_pcs_base + TSE_PCS_IF_MODE_REG); - - val = readw(tse_pcs_base + TSE_PCS_IF_MODE_REG); - val &= ~TSE_PCS_SGMII_SPEED_MASK; - - switch (speed) { - case 1000: - val |= TSE_PCS_SGMII_SPEED_1000; - break; - case 100: - val |= TSE_PCS_SGMII_SPEED_100; - break; - case 10: - val |= TSE_PCS_SGMII_SPEED_10; - break; - default: - return; - } - writew(val, tse_pcs_base + TSE_PCS_IF_MODE_REG); - - tse_pcs_reset(tse_pcs_base, pcs); - - timer_setup(&pcs->aneg_link_timer, aneg_link_timer_callback, - 0); - mod_timer(&pcs->aneg_link_timer, jiffies + - msecs_to_jiffies(AUTONEGO_LINK_TIMER)); - } -} diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h deleted file mode 100644 index 694ac25ef426..000000000000 --- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* Copyright Altera Corporation (C) 2016. All rights reserved. - * - * Author: Tien Hock Loh <thloh@altera.com> - */ - -#ifndef __TSE_PCS_H__ -#define __TSE_PCS_H__ - -#include <linux/phy.h> -#include <linux/timer.h> - -#define SGMII_ADAPTER_CTRL_REG 0x00 -#define SGMII_ADAPTER_ENABLE 0x0000 -#define SGMII_ADAPTER_DISABLE 0x0001 - -struct tse_pcs { - struct device *dev; - void __iomem *tse_pcs_base; - void __iomem *sgmii_adapter_base; - struct timer_list aneg_link_timer; - int autoneg; -}; - -int tse_pcs_init(void __iomem *base, struct tse_pcs *pcs); -void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev, - unsigned int speed); - -#endif /* __TSE_PCS_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 4ad692c4116c..16e67c18b6f7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -519,6 +519,7 @@ struct mac_device_info { const struct stmmac_tc_ops *tc; const struct stmmac_mmc_ops *mmc; struct dw_xpcs *xpcs; + struct phylink_pcs *lynx_pcs; /* Lynx external PCS */ struct mii_regs mii; /* MII register Addresses */ struct mac_link link; void __iomem *pcsr; /* vpointer to device CSRs */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c index 9354bf419112..58a7f08e8d78 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c @@ -141,7 +141,7 @@ MODULE_DEVICE_TABLE(of, anarion_dwmac_match); static struct platform_driver anarion_dwmac_driver = { .probe = anarion_dwmac_probe, - .remove = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = "anarion-dwmac", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index 18acf7dd74e5..9f88530c5e8c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -464,7 +464,7 @@ remove_config: return ret; } -static int dwc_eth_dwmac_remove(struct platform_device *pdev) +static void dwc_eth_dwmac_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct stmmac_priv *priv = netdev_priv(ndev); @@ -477,8 +477,6 @@ static int dwc_eth_dwmac_remove(struct platform_device *pdev) data->remove(pdev); stmmac_remove_config_dt(pdev, priv->plat); - - return 0; } static const struct of_device_id dwc_eth_dwmac_match[] = { @@ -490,7 +488,7 @@ MODULE_DEVICE_TABLE(of, dwc_eth_dwmac_match); static struct platform_driver dwc_eth_dwmac_driver = { .probe = dwc_eth_dwmac_probe, - .remove = dwc_eth_dwmac_remove, + .remove_new = dwc_eth_dwmac_remove, .driver = { .name = "dwc-eth-dwmac", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c index ef8f3a940938..20fc455b3337 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c @@ -46,22 +46,12 @@ static int dwmac_generic_probe(struct platform_device *pdev) plat_dat->unicast_filter_entries = 1; } - /* Custom initialisation (if needed) */ - if (plat_dat->init) { - ret = plat_dat->init(pdev, plat_dat->bsp_priv); - if (ret) - goto err_remove_config_dt; - } - - ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); + ret = stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); if (ret) - goto err_exit; + goto err_remove_config_dt; return 0; -err_exit: - if (plat_dat->exit) - plat_dat->exit(pdev, plat_dat->bsp_priv); err_remove_config_dt: if (pdev->dev.of_node) stmmac_remove_config_dt(pdev, plat_dat); @@ -87,7 +77,7 @@ MODULE_DEVICE_TABLE(of, dwmac_generic_match); static struct platform_driver dwmac_generic_driver = { .probe = dwmac_generic_probe, - .remove = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = STMMAC_RESOURCE_NAME, .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index 7c228bd0d099..b9378a63f0e8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -376,7 +376,7 @@ MODULE_DEVICE_TABLE(of, imx_dwmac_match); static struct platform_driver imx_dwmac_driver = { .probe = imx_dwmac_probe, - .remove = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = "imx-dwmac", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c index 378b4dd826bb..8063ba1c3ce8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c @@ -386,7 +386,7 @@ MODULE_DEVICE_TABLE(of, ingenic_mac_of_matches); static struct platform_driver ingenic_mac_driver = { .probe = ingenic_mac_probe, - .remove = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = "ingenic-mac", .pm = pm_ptr(&ingenic_mac_pm_ops), diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c index 06d287f104be..a5e639ab0b9e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c @@ -169,20 +169,17 @@ err_remove_config_dt: return ret; } -static int intel_eth_plat_remove(struct platform_device *pdev) +static void intel_eth_plat_remove(struct platform_device *pdev) { struct intel_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev); - int ret; - ret = stmmac_pltfr_remove(pdev); + stmmac_pltfr_remove(pdev); clk_disable_unprepare(dwmac->tx_clk); - - return ret; } static struct platform_driver intel_eth_plat_driver = { .probe = intel_eth_plat_probe, - .remove = intel_eth_plat_remove, + .remove_new = intel_eth_plat_remove, .driver = { .name = "intel-eth-plat", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index e888c8a9c830..e39406df8516 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -498,7 +498,7 @@ MODULE_DEVICE_TABLE(of, ipq806x_gmac_dwmac_match); static struct platform_driver ipq806x_gmac_dwmac_driver = { .probe = ipq806x_gmac_probe, - .remove = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = "ipq806x-gmac-dwmac", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c index 9d77c647badd..18e84ba693a6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c @@ -83,7 +83,7 @@ MODULE_DEVICE_TABLE(of, lpc18xx_dwmac_match); static struct platform_driver lpc18xx_dwmac_driver = { .probe = lpc18xx_dwmac_probe, - .remove = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = "lpc18xx-dwmac", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index 9ae31e3dc821..73c1dfa7ecb1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -678,15 +678,12 @@ err_remove_config_dt: return ret; } -static int mediatek_dwmac_remove(struct platform_device *pdev) +static void mediatek_dwmac_remove(struct platform_device *pdev) { struct mediatek_dwmac_plat_data *priv_plat = get_stmmac_bsp_priv(&pdev->dev); - int ret; - ret = stmmac_pltfr_remove(pdev); + stmmac_pltfr_remove(pdev); mediatek_dwmac_clks_config(priv_plat, false); - - return ret; } static const struct of_device_id mediatek_dwmac_match[] = { @@ -701,7 +698,7 @@ MODULE_DEVICE_TABLE(of, mediatek_dwmac_match); static struct platform_driver mediatek_dwmac_driver = { .probe = mediatek_dwmac_probe, - .remove = mediatek_dwmac_remove, + .remove_new = mediatek_dwmac_remove, .driver = { .name = "dwmac-mediatek", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c index 16fb66a0ca72..7aa5e6bc04eb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c @@ -91,7 +91,7 @@ MODULE_DEVICE_TABLE(of, meson6_dwmac_match); static struct platform_driver meson6_dwmac_driver = { .probe = meson6_dwmac_probe, - .remove = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = "meson6-dwmac", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index f6754e3643f3..92b16048f91c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -539,7 +539,7 @@ MODULE_DEVICE_TABLE(of, meson8b_dwmac_match); static struct platform_driver meson8b_dwmac_driver = { .probe = meson8b_dwmac_probe, - .remove = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = "meson8b-dwmac", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c index 62a69a91ab22..42954020de2c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c @@ -231,7 +231,7 @@ MODULE_DEVICE_TABLE(of, oxnas_dwmac_match); static struct platform_driver oxnas_dwmac_driver = { .probe = oxnas_dwmac_probe, - .remove = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = "oxnas-dwmac", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index f07905f00f98..e62940414e54 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -6,6 +6,9 @@ #include <linux/of_device.h> #include <linux/platform_device.h> #include <linux/phy.h> +#include <linux/phy/phy.h> +#include <linux/property.h> + #include "stmmac.h" #include "stmmac_platform.h" @@ -72,6 +75,10 @@ #define RGMII_CONFIG2_DATA_DIVIDE_CLK_SEL BIT(6) #define RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN BIT(5) +/* MAC_CTRL_REG bits */ +#define ETHQOS_MAC_CTRL_SPEED_MODE BIT(14) +#define ETHQOS_MAC_CTRL_PORT_SEL BIT(15) + struct ethqos_emac_por { unsigned int offset; unsigned int value; @@ -81,22 +88,28 @@ struct ethqos_emac_driver_data { const struct ethqos_emac_por *por; unsigned int num_por; bool rgmii_config_loopback_en; - bool has_emac3; + bool has_emac_ge_3; + const char *link_clk_name; + bool has_integrated_pcs; struct dwmac4_addrs dwmac4_addrs; }; struct qcom_ethqos { struct platform_device *pdev; void __iomem *rgmii_base; + void __iomem *mac_base; + int (*configure_func)(struct qcom_ethqos *ethqos); - unsigned int rgmii_clk_rate; - struct clk *rgmii_clk; + unsigned int link_clk_rate; + struct clk *link_clk; + struct phy *serdes_phy; unsigned int speed; + int phy_mode; const struct ethqos_emac_por *por; unsigned int num_por; bool rgmii_config_loopback_en; - bool has_emac3; + bool has_emac_ge_3; }; static int rgmii_readl(struct qcom_ethqos *ethqos, unsigned int offset) @@ -115,7 +128,7 @@ static void rgmii_updatel(struct qcom_ethqos *ethqos, { unsigned int temp; - temp = rgmii_readl(ethqos, offset); + temp = rgmii_readl(ethqos, offset); temp = (temp & ~(mask)) | val; rgmii_writel(ethqos, temp, offset); } @@ -123,25 +136,26 @@ static void rgmii_updatel(struct qcom_ethqos *ethqos, static void rgmii_dump(void *priv) { struct qcom_ethqos *ethqos = priv; + struct device *dev = ðqos->pdev->dev; - dev_dbg(ðqos->pdev->dev, "Rgmii register dump\n"); - dev_dbg(ðqos->pdev->dev, "RGMII_IO_MACRO_CONFIG: %x\n", + dev_dbg(dev, "Rgmii register dump\n"); + dev_dbg(dev, "RGMII_IO_MACRO_CONFIG: %x\n", rgmii_readl(ethqos, RGMII_IO_MACRO_CONFIG)); - dev_dbg(ðqos->pdev->dev, "SDCC_HC_REG_DLL_CONFIG: %x\n", + dev_dbg(dev, "SDCC_HC_REG_DLL_CONFIG: %x\n", rgmii_readl(ethqos, SDCC_HC_REG_DLL_CONFIG)); - dev_dbg(ðqos->pdev->dev, "SDCC_HC_REG_DDR_CONFIG: %x\n", + dev_dbg(dev, "SDCC_HC_REG_DDR_CONFIG: %x\n", rgmii_readl(ethqos, SDCC_HC_REG_DDR_CONFIG)); - dev_dbg(ðqos->pdev->dev, "SDCC_HC_REG_DLL_CONFIG2: %x\n", + dev_dbg(dev, "SDCC_HC_REG_DLL_CONFIG2: %x\n", rgmii_readl(ethqos, SDCC_HC_REG_DLL_CONFIG2)); - dev_dbg(ðqos->pdev->dev, "SDC4_STATUS: %x\n", + dev_dbg(dev, "SDC4_STATUS: %x\n", rgmii_readl(ethqos, SDC4_STATUS)); - dev_dbg(ðqos->pdev->dev, "SDCC_USR_CTL: %x\n", + dev_dbg(dev, "SDCC_USR_CTL: %x\n", rgmii_readl(ethqos, SDCC_USR_CTL)); - dev_dbg(ðqos->pdev->dev, "RGMII_IO_MACRO_CONFIG2: %x\n", + dev_dbg(dev, "RGMII_IO_MACRO_CONFIG2: %x\n", rgmii_readl(ethqos, RGMII_IO_MACRO_CONFIG2)); - dev_dbg(ðqos->pdev->dev, "RGMII_IO_MACRO_DEBUG1: %x\n", + dev_dbg(dev, "RGMII_IO_MACRO_DEBUG1: %x\n", rgmii_readl(ethqos, RGMII_IO_MACRO_DEBUG1)); - dev_dbg(ðqos->pdev->dev, "EMAC_SYSTEM_LOW_POWER_DEBUG: %x\n", + dev_dbg(dev, "EMAC_SYSTEM_LOW_POWER_DEBUG: %x\n", rgmii_readl(ethqos, EMAC_SYSTEM_LOW_POWER_DEBUG)); } @@ -151,23 +165,23 @@ static void rgmii_dump(void *priv) #define RGMII_ID_MODE_10_LOW_SVS_CLK_FREQ (5 * 1000 * 1000UL) static void -ethqos_update_rgmii_clk(struct qcom_ethqos *ethqos, unsigned int speed) +ethqos_update_link_clk(struct qcom_ethqos *ethqos, unsigned int speed) { switch (speed) { case SPEED_1000: - ethqos->rgmii_clk_rate = RGMII_1000_NOM_CLK_FREQ; + ethqos->link_clk_rate = RGMII_1000_NOM_CLK_FREQ; break; case SPEED_100: - ethqos->rgmii_clk_rate = RGMII_ID_MODE_100_LOW_SVS_CLK_FREQ; + ethqos->link_clk_rate = RGMII_ID_MODE_100_LOW_SVS_CLK_FREQ; break; case SPEED_10: - ethqos->rgmii_clk_rate = RGMII_ID_MODE_10_LOW_SVS_CLK_FREQ; + ethqos->link_clk_rate = RGMII_ID_MODE_10_LOW_SVS_CLK_FREQ; break; } - clk_set_rate(ethqos->rgmii_clk, ethqos->rgmii_clk_rate); + clk_set_rate(ethqos->link_clk, ethqos->link_clk_rate); } static void ethqos_set_func_clk_en(struct qcom_ethqos *ethqos) @@ -189,7 +203,7 @@ static const struct ethqos_emac_driver_data emac_v2_3_0_data = { .por = emac_v2_3_0_por, .num_por = ARRAY_SIZE(emac_v2_3_0_por), .rgmii_config_loopback_en = true, - .has_emac3 = false, + .has_emac_ge_3 = false, }; static const struct ethqos_emac_por emac_v2_1_0_por[] = { @@ -205,7 +219,7 @@ static const struct ethqos_emac_driver_data emac_v2_1_0_data = { .por = emac_v2_1_0_por, .num_por = ARRAY_SIZE(emac_v2_1_0_por), .rgmii_config_loopback_en = false, - .has_emac3 = false, + .has_emac_ge_3 = false, }; static const struct ethqos_emac_por emac_v3_0_0_por[] = { @@ -221,7 +235,41 @@ static const struct ethqos_emac_driver_data emac_v3_0_0_data = { .por = emac_v3_0_0_por, .num_por = ARRAY_SIZE(emac_v3_0_0_por), .rgmii_config_loopback_en = false, - .has_emac3 = true, + .has_emac_ge_3 = true, + .dwmac4_addrs = { + .dma_chan = 0x00008100, + .dma_chan_offset = 0x1000, + .mtl_chan = 0x00008000, + .mtl_chan_offset = 0x1000, + .mtl_ets_ctrl = 0x00008010, + .mtl_ets_ctrl_offset = 0x1000, + .mtl_txq_weight = 0x00008018, + .mtl_txq_weight_offset = 0x1000, + .mtl_send_slp_cred = 0x0000801c, + .mtl_send_slp_cred_offset = 0x1000, + .mtl_high_cred = 0x00008020, + .mtl_high_cred_offset = 0x1000, + .mtl_low_cred = 0x00008024, + .mtl_low_cred_offset = 0x1000, + }, +}; + +static const struct ethqos_emac_por emac_v4_0_0_por[] = { + { .offset = RGMII_IO_MACRO_CONFIG, .value = 0x40c01343 }, + { .offset = SDCC_HC_REG_DLL_CONFIG, .value = 0x2004642c }, + { .offset = SDCC_HC_REG_DDR_CONFIG, .value = 0x80040800 }, + { .offset = SDCC_HC_REG_DLL_CONFIG2, .value = 0x00200000 }, + { .offset = SDCC_USR_CTL, .value = 0x00010800 }, + { .offset = RGMII_IO_MACRO_CONFIG2, .value = 0x00002060 }, +}; + +static const struct ethqos_emac_driver_data emac_v4_0_0_data = { + .por = emac_v4_0_0_por, + .num_por = ARRAY_SIZE(emac_v3_0_0_por), + .rgmii_config_loopback_en = false, + .has_emac_ge_3 = true, + .link_clk_name = "phyaux", + .has_integrated_pcs = true, .dwmac4_addrs = { .dma_chan = 0x00008100, .dma_chan_offset = 0x1000, @@ -242,6 +290,7 @@ static const struct ethqos_emac_driver_data emac_v3_0_0_data = { static int ethqos_dll_configure(struct qcom_ethqos *ethqos) { + struct device *dev = ðqos->pdev->dev; unsigned int val; int retry = 1000; @@ -261,7 +310,7 @@ static int ethqos_dll_configure(struct qcom_ethqos *ethqos) rgmii_updatel(ethqos, SDCC_DLL_CONFIG_DLL_EN, SDCC_DLL_CONFIG_DLL_EN, SDCC_HC_REG_DLL_CONFIG); - if (!ethqos->has_emac3) { + if (!ethqos->has_emac_ge_3) { rgmii_updatel(ethqos, SDCC_DLL_MCLK_GATING_EN, 0, SDCC_HC_REG_DLL_CONFIG); @@ -279,7 +328,7 @@ static int ethqos_dll_configure(struct qcom_ethqos *ethqos) retry--; } while (retry > 0); if (!retry) - dev_err(ðqos->pdev->dev, "Clear CK_OUT_EN timedout\n"); + dev_err(dev, "Clear CK_OUT_EN timedout\n"); /* Set CK_OUT_EN */ rgmii_updatel(ethqos, SDCC_DLL_CONFIG_CK_OUT_EN, @@ -296,13 +345,13 @@ static int ethqos_dll_configure(struct qcom_ethqos *ethqos) retry--; } while (retry > 0); if (!retry) - dev_err(ðqos->pdev->dev, "Set CK_OUT_EN timedout\n"); + dev_err(dev, "Set CK_OUT_EN timedout\n"); /* Set DDR_CAL_EN */ rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_DDR_CAL_EN, SDCC_DLL_CONFIG2_DDR_CAL_EN, SDCC_HC_REG_DLL_CONFIG2); - if (!ethqos->has_emac3) { + if (!ethqos->has_emac_ge_3) { rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_DLL_CLOCK_DIS, 0, SDCC_HC_REG_DLL_CONFIG2); @@ -322,14 +371,13 @@ static int ethqos_dll_configure(struct qcom_ethqos *ethqos) static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos) { + struct device *dev = ðqos->pdev->dev; int phase_shift; - int phy_mode; int loopback; /* Determine if the PHY adds a 2 ns TX delay or the MAC handles it */ - phy_mode = device_get_phy_mode(ðqos->pdev->dev); - if (phy_mode == PHY_INTERFACE_MODE_RGMII_ID || - phy_mode == PHY_INTERFACE_MODE_RGMII_TXID) + if (ethqos->phy_mode == PHY_INTERFACE_MODE_RGMII_ID || + ethqos->phy_mode == PHY_INTERFACE_MODE_RGMII_TXID) phase_shift = 0; else phase_shift = RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN; @@ -373,7 +421,7 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos) /* PRG_RCLK_DLY = TCXO period * TCXO_CYCLES_CNT / 2 * RX delay ns, * in practice this becomes PRG_RCLK_DLY = 52 * 4 / 2 * RX delay ns */ - if (ethqos->has_emac3) { + if (ethqos->has_emac_ge_3) { /* 0.9 ns */ rgmii_updatel(ethqos, SDCC_DDR_CONFIG_PRG_RCLK_DLY, 115, SDCC_HC_REG_DDR_CONFIG); @@ -408,7 +456,7 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos) rgmii_updatel(ethqos, RGMII_CONFIG2_RSVD_CONFIG15, 0, RGMII_IO_MACRO_CONFIG2); - if (ethqos->has_emac3) + if (ethqos->has_emac_ge_3) rgmii_updatel(ethqos, RGMII_CONFIG2_RX_PROG_SWAP, RGMII_CONFIG2_RX_PROG_SWAP, RGMII_IO_MACRO_CONFIG2); @@ -448,7 +496,7 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos) RGMII_IO_MACRO_CONFIG); rgmii_updatel(ethqos, RGMII_CONFIG2_RSVD_CONFIG15, 0, RGMII_IO_MACRO_CONFIG2); - if (ethqos->has_emac3) + if (ethqos->has_emac_ge_3) rgmii_updatel(ethqos, RGMII_CONFIG2_RX_PROG_SWAP, RGMII_CONFIG2_RX_PROG_SWAP, RGMII_IO_MACRO_CONFIG2); @@ -468,16 +516,16 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos) loopback, RGMII_IO_MACRO_CONFIG); break; default: - dev_err(ðqos->pdev->dev, - "Invalid speed %d\n", ethqos->speed); + dev_err(dev, "Invalid speed %d\n", ethqos->speed); return -EINVAL; } return 0; } -static int ethqos_configure(struct qcom_ethqos *ethqos) +static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos) { + struct device *dev = ðqos->pdev->dev; volatile unsigned int dll_lock; unsigned int i, retry = 1000; @@ -497,7 +545,7 @@ static int ethqos_configure(struct qcom_ethqos *ethqos) rgmii_updatel(ethqos, SDCC_DLL_CONFIG_PDN, SDCC_DLL_CONFIG_PDN, SDCC_HC_REG_DLL_CONFIG); - if (ethqos->has_emac3) { + if (ethqos->has_emac_ge_3) { if (ethqos->speed == SPEED_1000) { rgmii_writel(ethqos, 0x1800000, SDCC_TEST_CTL); rgmii_writel(ethqos, 0x2C010800, SDCC_USR_CTL); @@ -527,7 +575,7 @@ static int ethqos_configure(struct qcom_ethqos *ethqos) SDCC_HC_REG_DLL_CONFIG); /* Set USR_CTL bit 26 with mask of 3 bits */ - if (!ethqos->has_emac3) + if (!ethqos->has_emac_ge_3) rgmii_updatel(ethqos, GENMASK(26, 24), BIT(26), SDCC_USR_CTL); @@ -540,8 +588,7 @@ static int ethqos_configure(struct qcom_ethqos *ethqos) retry--; } while (retry > 0); if (!retry) - dev_err(ðqos->pdev->dev, - "Timeout while waiting for DLL lock\n"); + dev_err(dev, "Timeout while waiting for DLL lock\n"); } if (ethqos->speed == SPEED_1000) @@ -552,24 +599,80 @@ static int ethqos_configure(struct qcom_ethqos *ethqos) return 0; } +static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos) +{ + int val; + + val = readl(ethqos->mac_base + MAC_CTRL_REG); + + switch (ethqos->speed) { + case SPEED_1000: + val &= ~ETHQOS_MAC_CTRL_PORT_SEL; + rgmii_updatel(ethqos, RGMII_CONFIG2_RGMII_CLK_SEL_CFG, + RGMII_CONFIG2_RGMII_CLK_SEL_CFG, + RGMII_IO_MACRO_CONFIG2); + break; + case SPEED_100: + val |= ETHQOS_MAC_CTRL_PORT_SEL | ETHQOS_MAC_CTRL_SPEED_MODE; + break; + case SPEED_10: + val |= ETHQOS_MAC_CTRL_PORT_SEL; + val &= ~ETHQOS_MAC_CTRL_SPEED_MODE; + break; + } + + writel(val, ethqos->mac_base + MAC_CTRL_REG); + + return val; +} + +static int ethqos_configure(struct qcom_ethqos *ethqos) +{ + return ethqos->configure_func(ethqos); +} + static void ethqos_fix_mac_speed(void *priv, unsigned int speed) { struct qcom_ethqos *ethqos = priv; ethqos->speed = speed; - ethqos_update_rgmii_clk(ethqos, speed); + ethqos_update_link_clk(ethqos, speed); ethqos_configure(ethqos); } +static int qcom_ethqos_serdes_powerup(struct net_device *ndev, void *priv) +{ + struct qcom_ethqos *ethqos = priv; + int ret; + + ret = phy_init(ethqos->serdes_phy); + if (ret) + return ret; + + ret = phy_power_on(ethqos->serdes_phy); + if (ret) + return ret; + + return phy_set_speed(ethqos->serdes_phy, ethqos->speed); +} + +static void qcom_ethqos_serdes_powerdown(struct net_device *ndev, void *priv) +{ + struct qcom_ethqos *ethqos = priv; + + phy_power_off(ethqos->serdes_phy); + phy_exit(ethqos->serdes_phy); +} + static int ethqos_clks_config(void *priv, bool enabled) { struct qcom_ethqos *ethqos = priv; int ret = 0; if (enabled) { - ret = clk_prepare_enable(ethqos->rgmii_clk); + ret = clk_prepare_enable(ethqos->link_clk); if (ret) { - dev_err(ðqos->pdev->dev, "rgmii_clk enable failed\n"); + dev_err(ðqos->pdev->dev, "link_clk enable failed\n"); return ret; } @@ -580,18 +683,24 @@ static int ethqos_clks_config(void *priv, bool enabled) */ ethqos_set_func_clk_en(ethqos); } else { - clk_disable_unprepare(ethqos->rgmii_clk); + clk_disable_unprepare(ethqos->link_clk); } return ret; } +static void ethqos_clks_disable(void *data) +{ + ethqos_clks_config(data, false); +} + static int qcom_ethqos_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; + const struct ethqos_emac_driver_data *data; struct plat_stmmacenet_data *plat_dat; struct stmmac_resources stmmac_res; - const struct ethqos_emac_driver_data *data; + struct device *dev = &pdev->dev; struct qcom_ethqos *ethqos; int ret; @@ -599,90 +708,91 @@ static int qcom_ethqos_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) { - dev_err(&pdev->dev, "dt configuration failed\n"); + dev_err(dev, "dt configuration failed\n"); return PTR_ERR(plat_dat); } plat_dat->clks_config = ethqos_clks_config; - ethqos = devm_kzalloc(&pdev->dev, sizeof(*ethqos), GFP_KERNEL); - if (!ethqos) { - ret = -ENOMEM; - goto err_mem; + ethqos = devm_kzalloc(dev, sizeof(*ethqos), GFP_KERNEL); + if (!ethqos) + return -ENOMEM; + + ethqos->phy_mode = device_get_phy_mode(dev); + switch (ethqos->phy_mode) { + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: + ethqos->configure_func = ethqos_configure_rgmii; + break; + case PHY_INTERFACE_MODE_SGMII: + ethqos->configure_func = ethqos_configure_sgmii; + break; + case -ENODEV: + return -ENODEV; + default: + return -EINVAL; } ethqos->pdev = pdev; ethqos->rgmii_base = devm_platform_ioremap_resource_byname(pdev, "rgmii"); - if (IS_ERR(ethqos->rgmii_base)) { - ret = PTR_ERR(ethqos->rgmii_base); - goto err_mem; - } + if (IS_ERR(ethqos->rgmii_base)) + return PTR_ERR(ethqos->rgmii_base); + + ethqos->mac_base = stmmac_res.addr; - data = of_device_get_match_data(&pdev->dev); + data = of_device_get_match_data(dev); ethqos->por = data->por; ethqos->num_por = data->num_por; ethqos->rgmii_config_loopback_en = data->rgmii_config_loopback_en; - ethqos->has_emac3 = data->has_emac3; + ethqos->has_emac_ge_3 = data->has_emac_ge_3; - ethqos->rgmii_clk = devm_clk_get(&pdev->dev, "rgmii"); - if (IS_ERR(ethqos->rgmii_clk)) { - ret = PTR_ERR(ethqos->rgmii_clk); - goto err_mem; - } + ethqos->link_clk = devm_clk_get(dev, data->link_clk_name ?: "rgmii"); + if (IS_ERR(ethqos->link_clk)) + return PTR_ERR(ethqos->link_clk); ret = ethqos_clks_config(ethqos, true); if (ret) - goto err_mem; + return ret; + + ret = devm_add_action_or_reset(dev, ethqos_clks_disable, ethqos); + if (ret) + return ret; + + ethqos->serdes_phy = devm_phy_optional_get(dev, "serdes"); + if (IS_ERR(ethqos->serdes_phy)) + return PTR_ERR(ethqos->serdes_phy); ethqos->speed = SPEED_1000; - ethqos_update_rgmii_clk(ethqos, SPEED_1000); + ethqos_update_link_clk(ethqos, SPEED_1000); ethqos_set_func_clk_en(ethqos); plat_dat->bsp_priv = ethqos; plat_dat->fix_mac_speed = ethqos_fix_mac_speed; plat_dat->dump_debug_regs = rgmii_dump; plat_dat->has_gmac4 = 1; - if (ethqos->has_emac3) + if (ethqos->has_emac_ge_3) plat_dat->dwmac4_addrs = &data->dwmac4_addrs; plat_dat->pmt = 1; plat_dat->tso_en = of_property_read_bool(np, "snps,tso"); if (of_device_is_compatible(np, "qcom,qcs404-ethqos")) plat_dat->rx_clk_runs_in_lpi = 1; + plat_dat->has_integrated_pcs = data->has_integrated_pcs; - ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); - if (ret) - goto err_clk; - - return ret; - -err_clk: - ethqos_clks_config(ethqos, false); - -err_mem: - stmmac_remove_config_dt(pdev, plat_dat); - - return ret; -} - -static int qcom_ethqos_remove(struct platform_device *pdev) -{ - struct qcom_ethqos *ethqos; - int ret; - - ethqos = get_stmmac_bsp_priv(&pdev->dev); - if (!ethqos) - return -ENODEV; - - ret = stmmac_pltfr_remove(pdev); - ethqos_clks_config(ethqos, false); + if (ethqos->serdes_phy) { + plat_dat->serdes_powerup = qcom_ethqos_serdes_powerup; + plat_dat->serdes_powerdown = qcom_ethqos_serdes_powerdown; + } - return ret; + return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); } static const struct of_device_id qcom_ethqos_match[] = { { .compatible = "qcom,qcs404-ethqos", .data = &emac_v2_3_0_data}, + { .compatible = "qcom,sa8775p-ethqos", .data = &emac_v4_0_0_data}, { .compatible = "qcom,sc8280xp-ethqos", .data = &emac_v3_0_0_data}, { .compatible = "qcom,sm8150-ethqos", .data = &emac_v2_1_0_data}, { } @@ -691,7 +801,6 @@ MODULE_DEVICE_TABLE(of, qcom_ethqos_match); static struct platform_driver qcom_ethqos_driver = { .probe = qcom_ethqos_probe, - .remove = qcom_ethqos_remove, .driver = { .name = "qcom-ethqos", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 4ea31ccf24d0..d81591b470a2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1863,15 +1863,13 @@ err_remove_config_dt: return ret; } -static int rk_gmac_remove(struct platform_device *pdev) +static void rk_gmac_remove(struct platform_device *pdev) { struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(&pdev->dev); stmmac_dvr_remove(&pdev->dev); rk_gmac_powerdown(bsp_priv); - - return 0; } #ifdef CONFIG_PM_SLEEP @@ -1925,7 +1923,7 @@ MODULE_DEVICE_TABLE(of, rk_gmac_dwmac_match); static struct platform_driver rk_gmac_dwmac_driver = { .probe = rk_gmac_probe, - .remove = rk_gmac_remove, + .remove_new = rk_gmac_remove, .driver = { .name = "rk_gmac-dwmac", .pm = &rk_gmac_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 6b447d8f0bd8..6267bcb60206 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -10,14 +10,14 @@ #include <linux/of_net.h> #include <linux/phy.h> #include <linux/regmap.h> +#include <linux/mdio/mdio-regmap.h> +#include <linux/pcs-lynx.h> #include <linux/reset.h> #include <linux/stmmac.h> #include "stmmac.h" #include "stmmac_platform.h" -#include "altr_tse_pcs.h" - #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII 0x0 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII 0x1 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII 0x2 @@ -37,6 +37,10 @@ #define EMAC_SPLITTER_CTRL_SPEED_100 0x3 #define EMAC_SPLITTER_CTRL_SPEED_1000 0x0 +#define SGMII_ADAPTER_CTRL_REG 0x00 +#define SGMII_ADAPTER_ENABLE 0x0000 +#define SGMII_ADAPTER_DISABLE 0x0001 + struct socfpga_dwmac; struct socfpga_dwmac_ops { int (*set_phy_mode)(struct socfpga_dwmac *dwmac_priv); @@ -50,16 +54,18 @@ struct socfpga_dwmac { struct reset_control *stmmac_rst; struct reset_control *stmmac_ocp_rst; void __iomem *splitter_base; + void __iomem *tse_pcs_base; + void __iomem *sgmii_adapter_base; bool f2h_ptp_ref_clk; - struct tse_pcs pcs; const struct socfpga_dwmac_ops *ops; + struct mdio_device *pcs_mdiodev; }; static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed) { struct socfpga_dwmac *dwmac = (struct socfpga_dwmac *)priv; void __iomem *splitter_base = dwmac->splitter_base; - void __iomem *sgmii_adapter_base = dwmac->pcs.sgmii_adapter_base; + void __iomem *sgmii_adapter_base = dwmac->sgmii_adapter_base; struct device *dev = dwmac->dev; struct net_device *ndev = dev_get_drvdata(dev); struct phy_device *phy_dev = ndev->phydev; @@ -89,11 +95,9 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed) writel(val, splitter_base + EMAC_SPLITTER_CTRL_REG); } - if (phy_dev && sgmii_adapter_base) { + if (phy_dev && sgmii_adapter_base) writew(SGMII_ADAPTER_ENABLE, sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); - tse_pcs_fix_mac_speed(&dwmac->pcs, phy_dev, speed); - } } static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *dev) @@ -183,11 +187,11 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device * goto err_node_put; } - dwmac->pcs.sgmii_adapter_base = + dwmac->sgmii_adapter_base = devm_ioremap_resource(dev, &res_sgmii_adapter); - if (IS_ERR(dwmac->pcs.sgmii_adapter_base)) { - ret = PTR_ERR(dwmac->pcs.sgmii_adapter_base); + if (IS_ERR(dwmac->sgmii_adapter_base)) { + ret = PTR_ERR(dwmac->sgmii_adapter_base); goto err_node_put; } } @@ -205,11 +209,11 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device * goto err_node_put; } - dwmac->pcs.tse_pcs_base = + dwmac->tse_pcs_base = devm_ioremap_resource(dev, &res_tse_pcs); - if (IS_ERR(dwmac->pcs.tse_pcs_base)) { - ret = PTR_ERR(dwmac->pcs.tse_pcs_base); + if (IS_ERR(dwmac->tse_pcs_base)) { + ret = PTR_ERR(dwmac->tse_pcs_base); goto err_node_put; } } @@ -235,6 +239,13 @@ static int socfpga_get_plat_phymode(struct socfpga_dwmac *dwmac) return priv->plat->interface; } +static void socfpga_sgmii_config(struct socfpga_dwmac *dwmac, bool enable) +{ + u16 val = enable ? SGMII_ADAPTER_ENABLE : SGMII_ADAPTER_DISABLE; + + writew(val, dwmac->sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); +} + static int socfpga_set_phy_mode_common(int phymode, u32 *val) { switch (phymode) { @@ -310,12 +321,8 @@ static int socfpga_gen5_set_phy_mode(struct socfpga_dwmac *dwmac) */ reset_control_deassert(dwmac->stmmac_ocp_rst); reset_control_deassert(dwmac->stmmac_rst); - if (phymode == PHY_INTERFACE_MODE_SGMII) { - if (tse_pcs_init(dwmac->pcs.tse_pcs_base, &dwmac->pcs) != 0) { - dev_err(dwmac->dev, "Unable to initialize TSE PCS"); - return -EINVAL; - } - } + if (phymode == PHY_INTERFACE_MODE_SGMII) + socfpga_sgmii_config(dwmac, true); return 0; } @@ -367,12 +374,8 @@ static int socfpga_gen10_set_phy_mode(struct socfpga_dwmac *dwmac) */ reset_control_deassert(dwmac->stmmac_ocp_rst); reset_control_deassert(dwmac->stmmac_rst); - if (phymode == PHY_INTERFACE_MODE_SGMII) { - if (tse_pcs_init(dwmac->pcs.tse_pcs_base, &dwmac->pcs) != 0) { - dev_err(dwmac->dev, "Unable to initialize TSE PCS"); - return -EINVAL; - } - } + if (phymode == PHY_INTERFACE_MODE_SGMII) + socfpga_sgmii_config(dwmac, true); return 0; } @@ -443,6 +446,48 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) if (ret) goto err_dvr_remove; + /* Create a regmap for the PCS so that it can be used by the PCS driver, + * if we have such a PCS + */ + if (dwmac->tse_pcs_base) { + struct regmap_config pcs_regmap_cfg; + struct mdio_regmap_config mrc; + struct regmap *pcs_regmap; + struct mii_bus *pcs_bus; + + memset(&pcs_regmap_cfg, 0, sizeof(pcs_regmap_cfg)); + memset(&mrc, 0, sizeof(mrc)); + + pcs_regmap_cfg.reg_bits = 16; + pcs_regmap_cfg.val_bits = 16; + pcs_regmap_cfg.reg_shift = REGMAP_UPSHIFT(1); + + pcs_regmap = devm_regmap_init_mmio(&pdev->dev, dwmac->tse_pcs_base, + &pcs_regmap_cfg); + if (IS_ERR(pcs_regmap)) { + ret = PTR_ERR(pcs_regmap); + goto err_dvr_remove; + } + + mrc.regmap = pcs_regmap; + mrc.parent = &pdev->dev; + mrc.valid_addr = 0x0; + mrc.autoscan = false; + + snprintf(mrc.name, MII_BUS_ID_SIZE, "%s-pcs-mii", ndev->name); + pcs_bus = devm_mdio_regmap_register(&pdev->dev, &mrc); + if (IS_ERR(pcs_bus)) { + ret = PTR_ERR(pcs_bus); + goto err_dvr_remove; + } + + stpriv->hw->lynx_pcs = lynx_pcs_create_mdiodev(pcs_bus, 0); + if (IS_ERR(stpriv->hw->lynx_pcs)) { + ret = PTR_ERR(stpriv->hw->lynx_pcs); + goto err_dvr_remove; + } + } + return 0; err_dvr_remove: @@ -453,6 +498,17 @@ err_remove_config_dt: return ret; } +static void socfpga_dwmac_remove(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + struct stmmac_priv *priv = netdev_priv(ndev); + struct phylink_pcs *pcs = priv->hw->lynx_pcs; + + stmmac_pltfr_remove(pdev); + + lynx_pcs_destroy(pcs); +} + #ifdef CONFIG_PM_SLEEP static int socfpga_dwmac_resume(struct device *dev) { @@ -524,7 +580,7 @@ MODULE_DEVICE_TABLE(of, socfpga_dwmac_match); static struct platform_driver socfpga_dwmac_driver = { .probe = socfpga_dwmac_probe, - .remove = stmmac_pltfr_remove, + .remove_new = socfpga_dwmac_remove, .driver = { .name = "socfpga-dwmac", .pm = &socfpga_dwmac_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c index 4f51a7889642..d3a39d2fb3a9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c @@ -156,7 +156,7 @@ MODULE_DEVICE_TABLE(of, starfive_dwmac_match); static struct platform_driver starfive_dwmac_driver = { .probe = starfive_dwmac_probe, - .remove = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = "starfive-dwmac", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c index 465ce66ef9c1..dcbb17c4f07a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c @@ -317,15 +317,13 @@ err_remove_config_dt: return ret; } -static int sti_dwmac_remove(struct platform_device *pdev) +static void sti_dwmac_remove(struct platform_device *pdev) { struct sti_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev); stmmac_dvr_remove(&pdev->dev); clk_disable_unprepare(dwmac->clk); - - return 0; } #ifdef CONFIG_PM_SLEEP @@ -365,7 +363,7 @@ MODULE_DEVICE_TABLE(of, sti_dwmac_match); static struct platform_driver sti_dwmac_driver = { .probe = sti_dwmac_probe, - .remove = sti_dwmac_remove, + .remove_new = sti_dwmac_remove, .driver = { .name = "sti-dwmac", .pm = &sti_dwmac_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c index 0616b3a04ff3..bdb4de59a672 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c @@ -417,7 +417,7 @@ err_remove_config_dt: return ret; } -static int stm32_dwmac_remove(struct platform_device *pdev) +static void stm32_dwmac_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct stmmac_priv *priv = netdev_priv(ndev); @@ -431,8 +431,6 @@ static int stm32_dwmac_remove(struct platform_device *pdev) dev_pm_clear_wake_irq(&pdev->dev); device_init_wakeup(&pdev->dev, false); } - - return 0; } static int stm32mp1_suspend(struct stm32_dwmac *dwmac) @@ -528,7 +526,7 @@ MODULE_DEVICE_TABLE(of, stm32_dwmac_match); static struct platform_driver stm32_dwmac_driver = { .probe = stm32_dwmac_probe, - .remove = stm32_dwmac_remove, + .remove_new = stm32_dwmac_remove, .driver = { .name = "stm32-dwmac", .pm = &stm32_dwmac_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index c2c592ba0eb8..1e714380d125 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -1294,7 +1294,7 @@ dwmac_deconfig: return ret; } -static int sun8i_dwmac_remove(struct platform_device *pdev) +static void sun8i_dwmac_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct stmmac_priv *priv = netdev_priv(ndev); @@ -1309,8 +1309,6 @@ static int sun8i_dwmac_remove(struct platform_device *pdev) stmmac_pltfr_remove(pdev); sun8i_dwmac_unset_syscon(gmac); - - return 0; } static void sun8i_dwmac_shutdown(struct platform_device *pdev) @@ -1341,7 +1339,7 @@ MODULE_DEVICE_TABLE(of, sun8i_dwmac_match); static struct platform_driver sun8i_dwmac_driver = { .probe = sun8i_dwmac_probe, - .remove = sun8i_dwmac_remove, + .remove_new = sun8i_dwmac_remove, .shutdown = sun8i_dwmac_shutdown, .driver = { .name = "dwmac-sun8i", diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c index fc3b0acc8f99..50963e91c347 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c @@ -179,7 +179,7 @@ MODULE_DEVICE_TABLE(of, sun7i_dwmac_match); static struct platform_driver sun7i_dwmac_driver = { .probe = sun7i_gmac_probe, - .remove = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = "sun7i-dwmac", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c index bdf990cf2f31..fbb0ccf84afc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c @@ -234,7 +234,8 @@ static int tegra_mgbe_probe(struct platform_device *pdev) res.addr = mgbe->regs; res.irq = irq; - mgbe->clks = devm_kzalloc(&pdev->dev, sizeof(*mgbe->clks), GFP_KERNEL); + mgbe->clks = devm_kcalloc(&pdev->dev, ARRAY_SIZE(mgbe_clks), + sizeof(*mgbe->clks), GFP_KERNEL); if (!mgbe->clks) return -ENOMEM; @@ -353,15 +354,13 @@ disable_clks: return err; } -static int tegra_mgbe_remove(struct platform_device *pdev) +static void tegra_mgbe_remove(struct platform_device *pdev) { struct tegra_mgbe *mgbe = get_stmmac_bsp_priv(&pdev->dev); clk_bulk_disable_unprepare(ARRAY_SIZE(mgbe_clks), mgbe->clks); stmmac_pltfr_remove(pdev); - - return 0; } static const struct of_device_id tegra_mgbe_match[] = { @@ -374,7 +373,7 @@ static SIMPLE_DEV_PM_OPS(tegra_mgbe_pm_ops, tegra_mgbe_suspend, tegra_mgbe_resum static struct platform_driver tegra_mgbe_driver = { .probe = tegra_mgbe_probe, - .remove = tegra_mgbe_remove, + .remove_new = tegra_mgbe_remove, .driver = { .name = "tegra-mgbe", .pm = &tegra_mgbe_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c index c3f10a92b62b..acbb284be174 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c @@ -198,7 +198,7 @@ static int visconti_eth_clock_probe(struct platform_device *pdev, return 0; } -static int visconti_eth_clock_remove(struct platform_device *pdev) +static void visconti_eth_clock_remove(struct platform_device *pdev) { struct visconti_eth *dwmac = get_stmmac_bsp_priv(&pdev->dev); struct net_device *ndev = platform_get_drvdata(pdev); @@ -206,8 +206,6 @@ static int visconti_eth_clock_remove(struct platform_device *pdev) clk_disable_unprepare(dwmac->phy_ref_clk); clk_disable_unprepare(priv->plat->stmmac_clk); - - return 0; } static int visconti_eth_dwmac_probe(struct platform_device *pdev) @@ -259,23 +257,16 @@ remove_config: return ret; } -static int visconti_eth_dwmac_remove(struct platform_device *pdev) +static void visconti_eth_dwmac_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct stmmac_priv *priv = netdev_priv(ndev); - int err; - err = stmmac_pltfr_remove(pdev); - if (err < 0) - dev_err(&pdev->dev, "failed to remove platform: %d\n", err); + stmmac_pltfr_remove(pdev); - err = visconti_eth_clock_remove(pdev); - if (err < 0) - dev_err(&pdev->dev, "failed to remove clock: %d\n", err); + visconti_eth_clock_remove(pdev); stmmac_remove_config_dt(pdev, priv->plat); - - return err; } static const struct of_device_id visconti_eth_dwmac_match[] = { @@ -286,7 +277,7 @@ MODULE_DEVICE_TABLE(of, visconti_eth_dwmac_match); static struct platform_driver visconti_eth_dwmac_driver = { .probe = visconti_eth_dwmac_probe, - .remove = visconti_eth_dwmac_remove, + .remove_new = visconti_eth_dwmac_remove, .driver = { .name = "visconti-eth-dwmac", .of_match_table = visconti_eth_dwmac_match, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c index df41eac54058..03ceb6a94073 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c @@ -240,13 +240,15 @@ void stmmac_dwmac4_set_mac_addr(void __iomem *ioaddr, const u8 addr[6], void stmmac_dwmac4_set_mac(void __iomem *ioaddr, bool enable) { u32 value = readl(ioaddr + GMAC_CONFIG); + u32 old_val = value; if (enable) value |= GMAC_CONFIG_RE | GMAC_CONFIG_TE; else value &= ~(GMAC_CONFIG_TE | GMAC_CONFIG_RE); - writel(value, ioaddr + GMAC_CONFIG); + if (value != old_val) + writel(value, ioaddr + GMAC_CONFIG); } void stmmac_dwmac4_get_mac_addr(void __iomem *ioaddr, unsigned char *addr, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index dfd53264e036..070bd912580b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -368,10 +368,12 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv, if (likely(intr_status & XGMAC_RI)) { x->rx_normal_irq_n++; + x->rxq_stats[chan].rx_normal_irq_n++; ret |= handle_rx; } if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) { x->tx_normal_irq_n++; + x->txq_stats[chan].tx_normal_irq_n++; ret |= handle_tx; } } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 87510951f4e8..6931973028ae 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -937,10 +937,13 @@ static struct phylink_pcs *stmmac_mac_select_pcs(struct phylink_config *config, { struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev)); - if (!priv->hw->xpcs) - return NULL; + if (priv->hw->xpcs) + return &priv->hw->xpcs->pcs; + + if (priv->hw->lynx_pcs) + return priv->hw->lynx_pcs; - return &priv->hw->xpcs->pcs; + return NULL; } static void stmmac_mac_config(struct phylink_config *config, unsigned int mode, @@ -2700,9 +2703,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) /* We still have pending packets, let's call for a new scheduling */ if (tx_q->dirty_tx != tx_q->cur_tx) - hrtimer_start(&tx_q->txtimer, - STMMAC_COAL_TIMER(priv->tx_coal_timer[queue]), - HRTIMER_MODE_REL); + stmmac_tx_timer_arm(priv, queue); __netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue)); @@ -2983,9 +2984,13 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv) static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue) { struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue]; + u32 tx_coal_timer = priv->tx_coal_timer[queue]; + + if (!tx_coal_timer) + return; hrtimer_start(&tx_q->txtimer, - STMMAC_COAL_TIMER(priv->tx_coal_timer[queue]), + STMMAC_COAL_TIMER(tx_coal_timer), HRTIMER_MODE_REL); } @@ -3813,7 +3818,8 @@ static int __stmmac_open(struct net_device *dev, if (priv->hw->pcs != STMMAC_PCS_TBI && priv->hw->pcs != STMMAC_PCS_RTBI && (!priv->hw->xpcs || - xpcs_get_an_mode(priv->hw->xpcs, mode) != DW_AN_C73)) { + xpcs_get_an_mode(priv->hw->xpcs, mode) != DW_AN_C73) && + !priv->hw->lynx_pcs) { ret = stmmac_init_phy(dev); if (ret) { netdev_err(priv->dev, @@ -5794,7 +5800,7 @@ static void stmmac_common_interrupt(struct stmmac_priv *priv) } /* PCS link status */ - if (priv->hw->pcs) { + if (priv->hw->pcs && !priv->plat->has_integrated_pcs) { if (priv->xstats.pcs_link) netif_carrier_on(priv->dev); else @@ -7457,12 +7463,6 @@ void stmmac_dvr_remove(struct device *dev) netif_carrier_off(ndev); unregister_netdev(ndev); - /* Serdes power down needs to happen after VLAN filter - * is deleted that is triggered by unregister_netdev(). - */ - if (priv->plat->serdes_powerdown) - priv->plat->serdes_powerdown(ndev, priv->plat->bsp_priv); - #ifdef CONFIG_DEBUG_FS stmmac_exit_fs(ndev); #endif diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 6807c4c1a0a2..3db1cb0fd160 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -491,7 +491,6 @@ int stmmac_mdio_reset(struct mii_bus *bus) int stmmac_xpcs_setup(struct mii_bus *bus) { struct net_device *ndev = bus->priv; - struct mdio_device *mdiodev; struct stmmac_priv *priv; struct dw_xpcs *xpcs; int mode, addr; @@ -501,16 +500,10 @@ int stmmac_xpcs_setup(struct mii_bus *bus) /* Try to probe the XPCS by scanning all addresses. */ for (addr = 0; addr < PHY_MAX_ADDR; addr++) { - mdiodev = mdio_device_create(bus, addr); - if (IS_ERR(mdiodev)) + xpcs = xpcs_create_mdiodev(bus, addr, mode); + if (IS_ERR(xpcs)) continue; - xpcs = xpcs_create(mdiodev, mode); - if (IS_ERR_OR_NULL(xpcs)) { - mdio_device_free(mdiodev); - continue; - } - priv->hw->xpcs = xpcs; break; } @@ -669,10 +662,8 @@ int stmmac_mdio_unregister(struct net_device *ndev) if (!priv->mii) return 0; - if (priv->hw->xpcs) { - mdio_device_free(priv->hw->xpcs->mdiodev); + if (priv->hw->xpcs) xpcs_destroy(priv->hw->xpcs); - } mdiobus_unregister(priv->mii); priv->mii->priv = NULL; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index eb0b2898daa3..231152ee5a32 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -8,6 +8,7 @@ Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> *******************************************************************************/ +#include <linux/device.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> #include <linux/module.h> @@ -629,6 +630,39 @@ error_pclk_get: return ret; } +static void devm_stmmac_remove_config_dt(void *data) +{ + struct plat_stmmacenet_data *plat = data; + + /* Platform data argument is unused */ + stmmac_remove_config_dt(NULL, plat); +} + +/** + * devm_stmmac_probe_config_dt + * @pdev: platform_device structure + * @mac: MAC address to use + * Description: Devres variant of stmmac_probe_config_dt(). Does not require + * the user to call stmmac_remove_config_dt() at driver detach. + */ +struct plat_stmmacenet_data * +devm_stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) +{ + struct plat_stmmacenet_data *plat; + int ret; + + plat = stmmac_probe_config_dt(pdev, mac); + if (IS_ERR(plat)) + return plat; + + ret = devm_add_action_or_reset(&pdev->dev, + devm_stmmac_remove_config_dt, plat); + if (ret) + return ERR_PTR(ret); + + return plat; +} + /** * stmmac_remove_config_dt - undo the effects of stmmac_probe_config_dt() * @pdev: platform_device structure @@ -651,12 +685,19 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) return ERR_PTR(-EINVAL); } +struct plat_stmmacenet_data * +devm_stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) +{ + return ERR_PTR(-EINVAL); +} + void stmmac_remove_config_dt(struct platform_device *pdev, struct plat_stmmacenet_data *plat) { } #endif /* CONFIG_OF */ EXPORT_SYMBOL_GPL(stmmac_probe_config_dt); +EXPORT_SYMBOL_GPL(devm_stmmac_probe_config_dt); EXPORT_SYMBOL_GPL(stmmac_remove_config_dt); int stmmac_get_platform_resources(struct platform_device *pdev, @@ -702,25 +743,127 @@ int stmmac_get_platform_resources(struct platform_device *pdev, EXPORT_SYMBOL_GPL(stmmac_get_platform_resources); /** - * stmmac_pltfr_remove + * stmmac_pltfr_init + * @pdev: pointer to the platform device + * @plat: driver data platform structure + * Description: Call the platform's init callback (if any) and propagate + * the return value. + */ +int stmmac_pltfr_init(struct platform_device *pdev, + struct plat_stmmacenet_data *plat) +{ + int ret = 0; + + if (plat->init) + ret = plat->init(pdev, plat->bsp_priv); + + return ret; +} +EXPORT_SYMBOL_GPL(stmmac_pltfr_init); + +/** + * stmmac_pltfr_exit + * @pdev: pointer to the platform device + * @plat: driver data platform structure + * Description: Call the platform's exit callback (if any). + */ +void stmmac_pltfr_exit(struct platform_device *pdev, + struct plat_stmmacenet_data *plat) +{ + if (plat->exit) + plat->exit(pdev, plat->bsp_priv); +} +EXPORT_SYMBOL_GPL(stmmac_pltfr_exit); + +/** + * stmmac_pltfr_probe * @pdev: platform device pointer - * Description: this function calls the main to free the net resources - * and calls the platforms hook and release the resources (e.g. mem). + * @plat: driver data platform structure + * @res: stmmac resources structure + * Description: This calls the platform's init() callback and probes the + * stmmac driver. */ -int stmmac_pltfr_remove(struct platform_device *pdev) +int stmmac_pltfr_probe(struct platform_device *pdev, + struct plat_stmmacenet_data *plat, + struct stmmac_resources *res) +{ + int ret; + + ret = stmmac_pltfr_init(pdev, plat); + if (ret) + return ret; + + ret = stmmac_dvr_probe(&pdev->dev, plat, res); + if (ret) { + stmmac_pltfr_exit(pdev, plat); + return ret; + } + + return ret; +} +EXPORT_SYMBOL_GPL(stmmac_pltfr_probe); + +static void devm_stmmac_pltfr_remove(void *data) +{ + struct platform_device *pdev = data; + + stmmac_pltfr_remove_no_dt(pdev); +} + +/** + * devm_stmmac_pltfr_probe + * @pdev: pointer to the platform device + * @plat: driver data platform structure + * @res: stmmac resources + * Description: Devres variant of stmmac_pltfr_probe(). Allows users to skip + * calling stmmac_pltfr_remove() on driver detach. + */ +int devm_stmmac_pltfr_probe(struct platform_device *pdev, + struct plat_stmmacenet_data *plat, + struct stmmac_resources *res) +{ + int ret; + + ret = stmmac_pltfr_probe(pdev, plat, res); + if (ret) + return ret; + + return devm_add_action_or_reset(&pdev->dev, devm_stmmac_pltfr_remove, + pdev); +} +EXPORT_SYMBOL_GPL(devm_stmmac_pltfr_probe); + +/** + * stmmac_pltfr_remove_no_dt + * @pdev: pointer to the platform device + * Description: This undoes the effects of stmmac_pltfr_probe() by removing the + * driver and calling the platform's exit() callback. + */ +void stmmac_pltfr_remove_no_dt(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct stmmac_priv *priv = netdev_priv(ndev); struct plat_stmmacenet_data *plat = priv->plat; stmmac_dvr_remove(&pdev->dev); + stmmac_pltfr_exit(pdev, plat); +} +EXPORT_SYMBOL_GPL(stmmac_pltfr_remove_no_dt); - if (plat->exit) - plat->exit(pdev, plat->bsp_priv); +/** + * stmmac_pltfr_remove + * @pdev: platform device pointer + * Description: this function calls the main to free the net resources + * and calls the platforms hook and release the resources (e.g. mem). + */ +void stmmac_pltfr_remove(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + struct stmmac_priv *priv = netdev_priv(ndev); + struct plat_stmmacenet_data *plat = priv->plat; + stmmac_pltfr_remove_no_dt(pdev); stmmac_remove_config_dt(pdev, plat); - - return 0; } EXPORT_SYMBOL_GPL(stmmac_pltfr_remove); @@ -739,8 +882,7 @@ static int __maybe_unused stmmac_pltfr_suspend(struct device *dev) struct platform_device *pdev = to_platform_device(dev); ret = stmmac_suspend(dev); - if (priv->plat->exit) - priv->plat->exit(pdev, priv->plat->bsp_priv); + stmmac_pltfr_exit(pdev, priv->plat); return ret; } @@ -757,9 +899,11 @@ static int __maybe_unused stmmac_pltfr_resume(struct device *dev) struct net_device *ndev = dev_get_drvdata(dev); struct stmmac_priv *priv = netdev_priv(ndev); struct platform_device *pdev = to_platform_device(dev); + int ret; - if (priv->plat->init) - priv->plat->init(pdev, priv->plat->bsp_priv); + ret = stmmac_pltfr_init(pdev, priv->plat->bsp_priv); + if (ret) + return ret; return stmmac_resume(dev); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h index 3fff3f59d73d..c5565b2a70ac 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h @@ -13,13 +13,27 @@ struct plat_stmmacenet_data * stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac); +struct plat_stmmacenet_data * +devm_stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac); void stmmac_remove_config_dt(struct platform_device *pdev, struct plat_stmmacenet_data *plat); int stmmac_get_platform_resources(struct platform_device *pdev, struct stmmac_resources *stmmac_res); -int stmmac_pltfr_remove(struct platform_device *pdev); +int stmmac_pltfr_init(struct platform_device *pdev, + struct plat_stmmacenet_data *plat); +void stmmac_pltfr_exit(struct platform_device *pdev, + struct plat_stmmacenet_data *plat); + +int stmmac_pltfr_probe(struct platform_device *pdev, + struct plat_stmmacenet_data *plat, + struct stmmac_resources *res); +int devm_stmmac_pltfr_probe(struct platform_device *pdev, + struct plat_stmmacenet_data *plat, + struct stmmac_resources *res); +void stmmac_pltfr_remove_no_dt(struct platform_device *pdev); +void stmmac_pltfr_remove(struct platform_device *pdev); extern const struct dev_pm_ops stmmac_pltfr_pm_ops; static inline void *get_stmmac_bsp_priv(struct device *dev) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 9d55226479b4..ac41ef4cbd2f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -966,8 +966,11 @@ static int tc_setup_taprio(struct stmmac_priv *priv, return -EOPNOTSUPP; } - if (!qopt->enable) + if (qopt->cmd == TAPRIO_CMD_DESTROY) goto disable; + else if (qopt->cmd != TAPRIO_CMD_REPLACE) + return -EOPNOTSUPP; + if (qopt->num_entries >= dep) return -EINVAL; if (!qopt->cycle_time) @@ -988,7 +991,7 @@ static int tc_setup_taprio(struct stmmac_priv *priv, mutex_lock(&priv->plat->est->lock); priv->plat->est->gcl_size = size; - priv->plat->est->enable = qopt->enable; + priv->plat->est->enable = qopt->cmd == TAPRIO_CMD_REPLACE; mutex_unlock(&priv->plat->est->lock); for (i = 0; i < size; i++) { diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index d61dfa250feb..b317b9486455 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -1998,10 +1998,8 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, skb->truesize += hlen - swivel; skb->len += hlen - swivel; - __skb_frag_set_page(frag, page->buffer); + skb_frag_fill_page_desc(frag, page->buffer, off, hlen - swivel); __skb_frag_ref(frag); - skb_frag_off_set(frag, off); - skb_frag_size_set(frag, hlen - swivel); /* any more data? */ if ((words[0] & RX_COMP1_SPLIT_PKT) && ((dlen -= hlen) > 0)) { @@ -2024,10 +2022,8 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, skb->len += hlen; frag++; - __skb_frag_set_page(frag, page->buffer); + skb_frag_fill_page_desc(frag, page->buffer, 0, hlen); __skb_frag_ref(frag); - skb_frag_off_set(frag, 0); - skb_frag_size_set(frag, hlen); RX_USED_ADD(page, hlen + cp->crc_size); } diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index a6211b95ed17..3525d5c0d694 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -25,6 +25,7 @@ #endif #include <net/ip.h> +#include <net/gso.h> #include <net/icmp.h> #include <net/route.h> diff --git a/drivers/net/ethernet/ti/am65-cpsw-qos.c b/drivers/net/ethernet/ti/am65-cpsw-qos.c index 3a908db6e5b2..eced87fa261c 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-qos.c +++ b/drivers/net/ethernet/ti/am65-cpsw-qos.c @@ -450,7 +450,7 @@ static int am65_cpsw_configure_taprio(struct net_device *ndev, am65_cpsw_est_update_state(ndev); - if (!est_new->taprio.enable) { + if (est_new->taprio.cmd == TAPRIO_CMD_DESTROY) { am65_cpsw_stop_est(ndev); return ret; } @@ -476,7 +476,7 @@ static int am65_cpsw_configure_taprio(struct net_device *ndev, am65_cpsw_est_set_sched_list(ndev, est_new); am65_cpsw_port_est_assign_buf_num(ndev, est_new->buf); - am65_cpsw_est_set(ndev, est_new->taprio.enable); + am65_cpsw_est_set(ndev, est_new->taprio.cmd == TAPRIO_CMD_REPLACE); if (tact == TACT_PROG) { ret = am65_cpsw_timer_set(ndev, est_new); @@ -520,7 +520,7 @@ static int am65_cpsw_set_taprio(struct net_device *ndev, void *type_data) am65_cpsw_cp_taprio(taprio, &est_new->taprio); ret = am65_cpsw_configure_taprio(ndev, est_new); if (!ret) { - if (taprio->enable) { + if (taprio->cmd == TAPRIO_CMD_REPLACE) { devm_kfree(&ndev->dev, port->qos.est_admin); port->qos.est_admin = est_new; @@ -564,8 +564,13 @@ purge_est: static int am65_cpsw_setup_taprio(struct net_device *ndev, void *type_data) { struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + struct tc_taprio_qopt_offload *taprio = type_data; struct am65_cpsw_common *common = port->common; + if (taprio->cmd != TAPRIO_CMD_REPLACE && + taprio->cmd != TAPRIO_CMD_DESTROY) + return -EOPNOTSUPP; + if (!IS_ENABLED(CONFIG_TI_AM65_CPSW_TAS)) return -ENODEV; diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index 0c5e783e574c..64bf22cd860c 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -106,23 +106,37 @@ struct cpsw_ale_dev_id { static inline int cpsw_ale_get_field(u32 *ale_entry, u32 start, u32 bits) { - int idx; + int idx, idx2; + u32 hi_val = 0; idx = start / 32; + idx2 = (start + bits - 1) / 32; + /* Check if bits to be fetched exceed a word */ + if (idx != idx2) { + idx2 = 2 - idx2; /* flip */ + hi_val = ale_entry[idx2] << ((idx2 * 32) - start); + } start -= idx * 32; idx = 2 - idx; /* flip */ - return (ale_entry[idx] >> start) & BITMASK(bits); + return (hi_val + (ale_entry[idx] >> start)) & BITMASK(bits); } static inline void cpsw_ale_set_field(u32 *ale_entry, u32 start, u32 bits, u32 value) { - int idx; + int idx, idx2; value &= BITMASK(bits); - idx = start / 32; + idx = start / 32; + idx2 = (start + bits - 1) / 32; + /* Check if bits to be set exceed a word */ + if (idx != idx2) { + idx2 = 2 - idx2; /* flip */ + ale_entry[idx2] &= ~(BITMASK(bits + start - (idx2 * 32))); + ale_entry[idx2] |= (value >> ((idx2 * 32) - start)); + } start -= idx * 32; - idx = 2 - idx; /* flip */ + idx = 2 - idx; /* flip */ ale_entry[idx] &= ~(BITMASK(bits) << start); ale_entry[idx] |= (value << start); } diff --git a/drivers/net/ethernet/wangxun/Kconfig b/drivers/net/ethernet/wangxun/Kconfig index c9d88673d306..39596cd13539 100644 --- a/drivers/net/ethernet/wangxun/Kconfig +++ b/drivers/net/ethernet/wangxun/Kconfig @@ -40,6 +40,16 @@ config NGBE config TXGBE tristate "Wangxun(R) 10GbE PCI Express adapters support" depends on PCI + depends on COMMON_CLK + select REGMAP + select I2C + select I2C_DESIGNWARE_PLATFORM + select PHYLINK + select HWMON if TXGBE=y + select SFP + select GPIOLIB + select GPIOLIB_IRQCHIP + select PCS_XPCS select LIBWX help This driver supports Wangxun(R) 10GbE PCI Express family of diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index ca409b4054d0..6321178fc814 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -1182,12 +1182,28 @@ static void wx_enable_sec_rx_path(struct wx *wx) WX_WRITE_FLUSH(wx); } +static void wx_vlan_strip_control(struct wx *wx, bool enable) +{ + int i, j; + + for (i = 0; i < wx->num_rx_queues; i++) { + struct wx_ring *ring = wx->rx_ring[i]; + + j = ring->reg_idx; + wr32m(wx, WX_PX_RR_CFG(j), WX_PX_RR_CFG_VLAN, + enable ? WX_PX_RR_CFG_VLAN : 0); + } +} + void wx_set_rx_mode(struct net_device *netdev) { struct wx *wx = netdev_priv(netdev); + netdev_features_t features; u32 fctrl, vmolr, vlnctrl; int count; + features = netdev->features; + /* Check for Promiscuous and All Multicast modes */ fctrl = rd32(wx, WX_PSR_CTL); fctrl &= ~(WX_PSR_CTL_UPE | WX_PSR_CTL_MPE); @@ -1254,6 +1270,13 @@ void wx_set_rx_mode(struct net_device *netdev) wr32(wx, WX_PSR_VLAN_CTL, vlnctrl); wr32(wx, WX_PSR_CTL, fctrl); wr32(wx, WX_PSR_VM_L2CTL(0), vmolr); + + if ((features & NETIF_F_HW_VLAN_CTAG_RX) && + (features & NETIF_F_HW_VLAN_STAG_RX)) + wx_vlan_strip_control(wx, true); + else + wx_vlan_strip_control(wx, false); + } EXPORT_SYMBOL(wx_set_rx_mode); @@ -1462,6 +1485,16 @@ static void wx_configure_tx(struct wx *wx) WX_MAC_TX_CFG_TE, WX_MAC_TX_CFG_TE); } +static void wx_restore_vlan(struct wx *wx) +{ + u16 vid = 1; + + wx_vlan_rx_add_vid(wx->netdev, htons(ETH_P_8021Q), 0); + + for_each_set_bit_from(vid, wx->active_vlans, VLAN_N_VID) + wx_vlan_rx_add_vid(wx->netdev, htons(ETH_P_8021Q), vid); +} + /** * wx_configure_rx - Configure Receive Unit after Reset * @wx: pointer to private structure @@ -1478,7 +1511,6 @@ static void wx_configure_rx(struct wx *wx) psrtype = WX_RDB_PL_CFG_L4HDR | WX_RDB_PL_CFG_L3HDR | WX_RDB_PL_CFG_L2HDR | - WX_RDB_PL_CFG_TUN_TUNHDR | WX_RDB_PL_CFG_TUN_TUNHDR; wr32(wx, WX_RDB_PL_CFG(0), psrtype); @@ -1527,7 +1559,7 @@ void wx_configure(struct wx *wx) wx_configure_port(wx); wx_set_rx_mode(wx->netdev); - + wx_restore_vlan(wx); wx_enable_sec_rx_path(wx); wx_configure_tx(wx); @@ -1727,4 +1759,241 @@ int wx_sw_init(struct wx *wx) } EXPORT_SYMBOL(wx_sw_init); +/** + * wx_find_vlvf_slot - find the vlanid or the first empty slot + * @wx: pointer to hardware structure + * @vlan: VLAN id to write to VLAN filter + * + * return the VLVF index where this VLAN id should be placed + * + **/ +static int wx_find_vlvf_slot(struct wx *wx, u32 vlan) +{ + u32 bits = 0, first_empty_slot = 0; + int regindex; + + /* short cut the special case */ + if (vlan == 0) + return 0; + + /* Search for the vlan id in the VLVF entries. Save off the first empty + * slot found along the way + */ + for (regindex = 1; regindex < WX_PSR_VLAN_SWC_ENTRIES; regindex++) { + wr32(wx, WX_PSR_VLAN_SWC_IDX, regindex); + bits = rd32(wx, WX_PSR_VLAN_SWC); + if (!bits && !(first_empty_slot)) + first_empty_slot = regindex; + else if ((bits & 0x0FFF) == vlan) + break; + } + + if (regindex >= WX_PSR_VLAN_SWC_ENTRIES) { + if (first_empty_slot) + regindex = first_empty_slot; + else + regindex = -ENOMEM; + } + + return regindex; +} + +/** + * wx_set_vlvf - Set VLAN Pool Filter + * @wx: pointer to hardware structure + * @vlan: VLAN id to write to VLAN filter + * @vind: VMDq output index that maps queue to VLAN id in VFVFB + * @vlan_on: boolean flag to turn on/off VLAN in VFVF + * @vfta_changed: pointer to boolean flag which indicates whether VFTA + * should be changed + * + * Turn on/off specified bit in VLVF table. + **/ +static int wx_set_vlvf(struct wx *wx, u32 vlan, u32 vind, bool vlan_on, + bool *vfta_changed) +{ + int vlvf_index; + u32 vt, bits; + + /* If VT Mode is set + * Either vlan_on + * make sure the vlan is in VLVF + * set the vind bit in the matching VLVFB + * Or !vlan_on + * clear the pool bit and possibly the vind + */ + vt = rd32(wx, WX_CFG_PORT_CTL); + if (!(vt & WX_CFG_PORT_CTL_NUM_VT_MASK)) + return 0; + + vlvf_index = wx_find_vlvf_slot(wx, vlan); + if (vlvf_index < 0) + return vlvf_index; + + wr32(wx, WX_PSR_VLAN_SWC_IDX, vlvf_index); + if (vlan_on) { + /* set the pool bit */ + if (vind < 32) { + bits = rd32(wx, WX_PSR_VLAN_SWC_VM_L); + bits |= (1 << vind); + wr32(wx, WX_PSR_VLAN_SWC_VM_L, bits); + } else { + bits = rd32(wx, WX_PSR_VLAN_SWC_VM_H); + bits |= (1 << (vind - 32)); + wr32(wx, WX_PSR_VLAN_SWC_VM_H, bits); + } + } else { + /* clear the pool bit */ + if (vind < 32) { + bits = rd32(wx, WX_PSR_VLAN_SWC_VM_L); + bits &= ~(1 << vind); + wr32(wx, WX_PSR_VLAN_SWC_VM_L, bits); + bits |= rd32(wx, WX_PSR_VLAN_SWC_VM_H); + } else { + bits = rd32(wx, WX_PSR_VLAN_SWC_VM_H); + bits &= ~(1 << (vind - 32)); + wr32(wx, WX_PSR_VLAN_SWC_VM_H, bits); + bits |= rd32(wx, WX_PSR_VLAN_SWC_VM_L); + } + } + + if (bits) { + wr32(wx, WX_PSR_VLAN_SWC, (WX_PSR_VLAN_SWC_VIEN | vlan)); + if (!vlan_on && vfta_changed) + *vfta_changed = false; + } else { + wr32(wx, WX_PSR_VLAN_SWC, 0); + } + + return 0; +} + +/** + * wx_set_vfta - Set VLAN filter table + * @wx: pointer to hardware structure + * @vlan: VLAN id to write to VLAN filter + * @vind: VMDq output index that maps queue to VLAN id in VFVFB + * @vlan_on: boolean flag to turn on/off VLAN in VFVF + * + * Turn on/off specified VLAN in the VLAN filter table. + **/ +static int wx_set_vfta(struct wx *wx, u32 vlan, u32 vind, bool vlan_on) +{ + u32 bitindex, vfta, targetbit; + bool vfta_changed = false; + int regindex, ret; + + /* this is a 2 part operation - first the VFTA, then the + * VLVF and VLVFB if VT Mode is set + * We don't write the VFTA until we know the VLVF part succeeded. + */ + + /* Part 1 + * The VFTA is a bitstring made up of 128 32-bit registers + * that enable the particular VLAN id, much like the MTA: + * bits[11-5]: which register + * bits[4-0]: which bit in the register + */ + regindex = (vlan >> 5) & 0x7F; + bitindex = vlan & 0x1F; + targetbit = (1 << bitindex); + /* errata 5 */ + vfta = wx->mac.vft_shadow[regindex]; + if (vlan_on) { + if (!(vfta & targetbit)) { + vfta |= targetbit; + vfta_changed = true; + } + } else { + if ((vfta & targetbit)) { + vfta &= ~targetbit; + vfta_changed = true; + } + } + /* Part 2 + * Call wx_set_vlvf to set VLVFB and VLVF + */ + ret = wx_set_vlvf(wx, vlan, vind, vlan_on, &vfta_changed); + if (ret != 0) + return ret; + + if (vfta_changed) + wr32(wx, WX_PSR_VLAN_TBL(regindex), vfta); + wx->mac.vft_shadow[regindex] = vfta; + + return 0; +} + +/** + * wx_clear_vfta - Clear VLAN filter table + * @wx: pointer to hardware structure + * + * Clears the VLAN filer table, and the VMDq index associated with the filter + **/ +static void wx_clear_vfta(struct wx *wx) +{ + u32 offset; + + for (offset = 0; offset < wx->mac.vft_size; offset++) { + wr32(wx, WX_PSR_VLAN_TBL(offset), 0); + wx->mac.vft_shadow[offset] = 0; + } + + for (offset = 0; offset < WX_PSR_VLAN_SWC_ENTRIES; offset++) { + wr32(wx, WX_PSR_VLAN_SWC_IDX, offset); + wr32(wx, WX_PSR_VLAN_SWC, 0); + wr32(wx, WX_PSR_VLAN_SWC_VM_L, 0); + wr32(wx, WX_PSR_VLAN_SWC_VM_H, 0); + } +} + +int wx_vlan_rx_add_vid(struct net_device *netdev, + __be16 proto, u16 vid) +{ + struct wx *wx = netdev_priv(netdev); + + /* add VID to filter table */ + wx_set_vfta(wx, vid, VMDQ_P(0), true); + set_bit(vid, wx->active_vlans); + + return 0; +} +EXPORT_SYMBOL(wx_vlan_rx_add_vid); + +int wx_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) +{ + struct wx *wx = netdev_priv(netdev); + + /* remove VID from filter table */ + if (vid) + wx_set_vfta(wx, vid, VMDQ_P(0), false); + clear_bit(vid, wx->active_vlans); + + return 0; +} +EXPORT_SYMBOL(wx_vlan_rx_kill_vid); + +/** + * wx_start_hw - Prepare hardware for Tx/Rx + * @wx: pointer to hardware structure + * + * Starts the hardware using the generic start_hw function + * and the generation start_hw function. + * Then performs revision-specific operations, if any. + **/ +void wx_start_hw(struct wx *wx) +{ + int i; + + /* Clear the VLAN filter table */ + wx_clear_vfta(wx); + WX_WRITE_FLUSH(wx); + /* Clear the rate limiters */ + for (i = 0; i < wx->mac.max_tx_queues; i++) { + wr32(wx, WX_TDM_RP_IDX, i); + wr32(wx, WX_TDM_RP_RATE, 0); + } +} +EXPORT_SYMBOL(wx_start_hw); + MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.h b/drivers/net/ethernet/wangxun/libwx/wx_hw.h index c173c56f0ab5..1f93ca32c921 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.h @@ -26,10 +26,13 @@ void wx_set_rx_mode(struct net_device *netdev); int wx_change_mtu(struct net_device *netdev, int new_mtu); void wx_disable_rx_queue(struct wx *wx, struct wx_ring *ring); void wx_configure(struct wx *wx); +void wx_start_hw(struct wx *wx); int wx_disable_pcie_master(struct wx *wx); int wx_stop_adapter(struct wx *wx); void wx_reset_misc(struct wx *wx); int wx_get_pcie_msix_counts(struct wx *wx, u16 *msix_count, u16 max_msix_count); int wx_sw_init(struct wx *wx); +int wx_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid); +int wx_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid); #endif /* _WX_HW_H_ */ diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 1e8d8b7b0c62..2c3f08be8c37 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -2,14 +2,157 @@ /* Copyright (c) 2019 - 2022 Beijing WangXun Technology Co., Ltd. */ #include <linux/etherdevice.h> +#include <net/ip6_checksum.h> #include <net/page_pool.h> +#include <net/inet_ecn.h> #include <linux/iopoll.h> +#include <linux/sctp.h> #include <linux/pci.h> +#include <net/tcp.h> +#include <net/ip.h> #include "wx_type.h" #include "wx_lib.h" #include "wx_hw.h" +/* Lookup table mapping the HW PTYPE to the bit field for decoding */ +static struct wx_dec_ptype wx_ptype_lookup[256] = { + /* L2: mac */ + [0x11] = WX_PTT(L2, NONE, NONE, NONE, NONE, PAY2), + [0x12] = WX_PTT(L2, NONE, NONE, NONE, TS, PAY2), + [0x13] = WX_PTT(L2, NONE, NONE, NONE, NONE, PAY2), + [0x14] = WX_PTT(L2, NONE, NONE, NONE, NONE, PAY2), + [0x15] = WX_PTT(L2, NONE, NONE, NONE, NONE, NONE), + [0x16] = WX_PTT(L2, NONE, NONE, NONE, NONE, PAY2), + [0x17] = WX_PTT(L2, NONE, NONE, NONE, NONE, NONE), + + /* L2: ethertype filter */ + [0x18 ... 0x1F] = WX_PTT(L2, NONE, NONE, NONE, NONE, NONE), + + /* L3: ip non-tunnel */ + [0x21] = WX_PTT(IP, FGV4, NONE, NONE, NONE, PAY3), + [0x22] = WX_PTT(IP, IPV4, NONE, NONE, NONE, PAY3), + [0x23] = WX_PTT(IP, IPV4, NONE, NONE, UDP, PAY4), + [0x24] = WX_PTT(IP, IPV4, NONE, NONE, TCP, PAY4), + [0x25] = WX_PTT(IP, IPV4, NONE, NONE, SCTP, PAY4), + [0x29] = WX_PTT(IP, FGV6, NONE, NONE, NONE, PAY3), + [0x2A] = WX_PTT(IP, IPV6, NONE, NONE, NONE, PAY3), + [0x2B] = WX_PTT(IP, IPV6, NONE, NONE, UDP, PAY3), + [0x2C] = WX_PTT(IP, IPV6, NONE, NONE, TCP, PAY4), + [0x2D] = WX_PTT(IP, IPV6, NONE, NONE, SCTP, PAY4), + + /* L2: fcoe */ + [0x30 ... 0x34] = WX_PTT(FCOE, NONE, NONE, NONE, NONE, PAY3), + [0x38 ... 0x3C] = WX_PTT(FCOE, NONE, NONE, NONE, NONE, PAY3), + + /* IPv4 --> IPv4/IPv6 */ + [0x81] = WX_PTT(IP, IPV4, IPIP, FGV4, NONE, PAY3), + [0x82] = WX_PTT(IP, IPV4, IPIP, IPV4, NONE, PAY3), + [0x83] = WX_PTT(IP, IPV4, IPIP, IPV4, UDP, PAY4), + [0x84] = WX_PTT(IP, IPV4, IPIP, IPV4, TCP, PAY4), + [0x85] = WX_PTT(IP, IPV4, IPIP, IPV4, SCTP, PAY4), + [0x89] = WX_PTT(IP, IPV4, IPIP, FGV6, NONE, PAY3), + [0x8A] = WX_PTT(IP, IPV4, IPIP, IPV6, NONE, PAY3), + [0x8B] = WX_PTT(IP, IPV4, IPIP, IPV6, UDP, PAY4), + [0x8C] = WX_PTT(IP, IPV4, IPIP, IPV6, TCP, PAY4), + [0x8D] = WX_PTT(IP, IPV4, IPIP, IPV6, SCTP, PAY4), + + /* IPv4 --> GRE/NAT --> NONE/IPv4/IPv6 */ + [0x90] = WX_PTT(IP, IPV4, IG, NONE, NONE, PAY3), + [0x91] = WX_PTT(IP, IPV4, IG, FGV4, NONE, PAY3), + [0x92] = WX_PTT(IP, IPV4, IG, IPV4, NONE, PAY3), + [0x93] = WX_PTT(IP, IPV4, IG, IPV4, UDP, PAY4), + [0x94] = WX_PTT(IP, IPV4, IG, IPV4, TCP, PAY4), + [0x95] = WX_PTT(IP, IPV4, IG, IPV4, SCTP, PAY4), + [0x99] = WX_PTT(IP, IPV4, IG, FGV6, NONE, PAY3), + [0x9A] = WX_PTT(IP, IPV4, IG, IPV6, NONE, PAY3), + [0x9B] = WX_PTT(IP, IPV4, IG, IPV6, UDP, PAY4), + [0x9C] = WX_PTT(IP, IPV4, IG, IPV6, TCP, PAY4), + [0x9D] = WX_PTT(IP, IPV4, IG, IPV6, SCTP, PAY4), + + /* IPv4 --> GRE/NAT --> MAC --> NONE/IPv4/IPv6 */ + [0xA0] = WX_PTT(IP, IPV4, IGM, NONE, NONE, PAY3), + [0xA1] = WX_PTT(IP, IPV4, IGM, FGV4, NONE, PAY3), + [0xA2] = WX_PTT(IP, IPV4, IGM, IPV4, NONE, PAY3), + [0xA3] = WX_PTT(IP, IPV4, IGM, IPV4, UDP, PAY4), + [0xA4] = WX_PTT(IP, IPV4, IGM, IPV4, TCP, PAY4), + [0xA5] = WX_PTT(IP, IPV4, IGM, IPV4, SCTP, PAY4), + [0xA9] = WX_PTT(IP, IPV4, IGM, FGV6, NONE, PAY3), + [0xAA] = WX_PTT(IP, IPV4, IGM, IPV6, NONE, PAY3), + [0xAB] = WX_PTT(IP, IPV4, IGM, IPV6, UDP, PAY4), + [0xAC] = WX_PTT(IP, IPV4, IGM, IPV6, TCP, PAY4), + [0xAD] = WX_PTT(IP, IPV4, IGM, IPV6, SCTP, PAY4), + + /* IPv4 --> GRE/NAT --> MAC+VLAN --> NONE/IPv4/IPv6 */ + [0xB0] = WX_PTT(IP, IPV4, IGMV, NONE, NONE, PAY3), + [0xB1] = WX_PTT(IP, IPV4, IGMV, FGV4, NONE, PAY3), + [0xB2] = WX_PTT(IP, IPV4, IGMV, IPV4, NONE, PAY3), + [0xB3] = WX_PTT(IP, IPV4, IGMV, IPV4, UDP, PAY4), + [0xB4] = WX_PTT(IP, IPV4, IGMV, IPV4, TCP, PAY4), + [0xB5] = WX_PTT(IP, IPV4, IGMV, IPV4, SCTP, PAY4), + [0xB9] = WX_PTT(IP, IPV4, IGMV, FGV6, NONE, PAY3), + [0xBA] = WX_PTT(IP, IPV4, IGMV, IPV6, NONE, PAY3), + [0xBB] = WX_PTT(IP, IPV4, IGMV, IPV6, UDP, PAY4), + [0xBC] = WX_PTT(IP, IPV4, IGMV, IPV6, TCP, PAY4), + [0xBD] = WX_PTT(IP, IPV4, IGMV, IPV6, SCTP, PAY4), + + /* IPv6 --> IPv4/IPv6 */ + [0xC1] = WX_PTT(IP, IPV6, IPIP, FGV4, NONE, PAY3), + [0xC2] = WX_PTT(IP, IPV6, IPIP, IPV4, NONE, PAY3), + [0xC3] = WX_PTT(IP, IPV6, IPIP, IPV4, UDP, PAY4), + [0xC4] = WX_PTT(IP, IPV6, IPIP, IPV4, TCP, PAY4), + [0xC5] = WX_PTT(IP, IPV6, IPIP, IPV4, SCTP, PAY4), + [0xC9] = WX_PTT(IP, IPV6, IPIP, FGV6, NONE, PAY3), + [0xCA] = WX_PTT(IP, IPV6, IPIP, IPV6, NONE, PAY3), + [0xCB] = WX_PTT(IP, IPV6, IPIP, IPV6, UDP, PAY4), + [0xCC] = WX_PTT(IP, IPV6, IPIP, IPV6, TCP, PAY4), + [0xCD] = WX_PTT(IP, IPV6, IPIP, IPV6, SCTP, PAY4), + + /* IPv6 --> GRE/NAT -> NONE/IPv4/IPv6 */ + [0xD0] = WX_PTT(IP, IPV6, IG, NONE, NONE, PAY3), + [0xD1] = WX_PTT(IP, IPV6, IG, FGV4, NONE, PAY3), + [0xD2] = WX_PTT(IP, IPV6, IG, IPV4, NONE, PAY3), + [0xD3] = WX_PTT(IP, IPV6, IG, IPV4, UDP, PAY4), + [0xD4] = WX_PTT(IP, IPV6, IG, IPV4, TCP, PAY4), + [0xD5] = WX_PTT(IP, IPV6, IG, IPV4, SCTP, PAY4), + [0xD9] = WX_PTT(IP, IPV6, IG, FGV6, NONE, PAY3), + [0xDA] = WX_PTT(IP, IPV6, IG, IPV6, NONE, PAY3), + [0xDB] = WX_PTT(IP, IPV6, IG, IPV6, UDP, PAY4), + [0xDC] = WX_PTT(IP, IPV6, IG, IPV6, TCP, PAY4), + [0xDD] = WX_PTT(IP, IPV6, IG, IPV6, SCTP, PAY4), + + /* IPv6 --> GRE/NAT -> MAC -> NONE/IPv4/IPv6 */ + [0xE0] = WX_PTT(IP, IPV6, IGM, NONE, NONE, PAY3), + [0xE1] = WX_PTT(IP, IPV6, IGM, FGV4, NONE, PAY3), + [0xE2] = WX_PTT(IP, IPV6, IGM, IPV4, NONE, PAY3), + [0xE3] = WX_PTT(IP, IPV6, IGM, IPV4, UDP, PAY4), + [0xE4] = WX_PTT(IP, IPV6, IGM, IPV4, TCP, PAY4), + [0xE5] = WX_PTT(IP, IPV6, IGM, IPV4, SCTP, PAY4), + [0xE9] = WX_PTT(IP, IPV6, IGM, FGV6, NONE, PAY3), + [0xEA] = WX_PTT(IP, IPV6, IGM, IPV6, NONE, PAY3), + [0xEB] = WX_PTT(IP, IPV6, IGM, IPV6, UDP, PAY4), + [0xEC] = WX_PTT(IP, IPV6, IGM, IPV6, TCP, PAY4), + [0xED] = WX_PTT(IP, IPV6, IGM, IPV6, SCTP, PAY4), + + /* IPv6 --> GRE/NAT -> MAC--> NONE/IPv */ + [0xF0] = WX_PTT(IP, IPV6, IGMV, NONE, NONE, PAY3), + [0xF1] = WX_PTT(IP, IPV6, IGMV, FGV4, NONE, PAY3), + [0xF2] = WX_PTT(IP, IPV6, IGMV, IPV4, NONE, PAY3), + [0xF3] = WX_PTT(IP, IPV6, IGMV, IPV4, UDP, PAY4), + [0xF4] = WX_PTT(IP, IPV6, IGMV, IPV4, TCP, PAY4), + [0xF5] = WX_PTT(IP, IPV6, IGMV, IPV4, SCTP, PAY4), + [0xF9] = WX_PTT(IP, IPV6, IGMV, FGV6, NONE, PAY3), + [0xFA] = WX_PTT(IP, IPV6, IGMV, IPV6, NONE, PAY3), + [0xFB] = WX_PTT(IP, IPV6, IGMV, IPV6, UDP, PAY4), + [0xFC] = WX_PTT(IP, IPV6, IGMV, IPV6, TCP, PAY4), + [0xFD] = WX_PTT(IP, IPV6, IGMV, IPV6, SCTP, PAY4), +}; + +static struct wx_dec_ptype wx_decode_ptype(const u8 ptype) +{ + return wx_ptype_lookup[ptype]; +} + /* wx_test_staterr - tests bits in Rx descriptor status and error fields */ static __le32 wx_test_staterr(union wx_rx_desc *rx_desc, const u32 stat_err_bits) @@ -419,6 +562,116 @@ static bool wx_cleanup_headers(struct wx_ring *rx_ring, return false; } +static void wx_rx_hash(struct wx_ring *ring, + union wx_rx_desc *rx_desc, + struct sk_buff *skb) +{ + u16 rss_type; + + if (!(ring->netdev->features & NETIF_F_RXHASH)) + return; + + rss_type = le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.pkt_info) & + WX_RXD_RSSTYPE_MASK; + + if (!rss_type) + return; + + skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), + (WX_RSS_L4_TYPES_MASK & (1ul << rss_type)) ? + PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3); +} + +/** + * wx_rx_checksum - indicate in skb if hw indicated a good cksum + * @ring: structure containing ring specific data + * @rx_desc: current Rx descriptor being processed + * @skb: skb currently being received and modified + **/ +static void wx_rx_checksum(struct wx_ring *ring, + union wx_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct wx_dec_ptype dptype = wx_decode_ptype(WX_RXD_PKTTYPE(rx_desc)); + + skb_checksum_none_assert(skb); + /* Rx csum disabled */ + if (!(ring->netdev->features & NETIF_F_RXCSUM)) + return; + + /* if IPv4 header checksum error */ + if ((wx_test_staterr(rx_desc, WX_RXD_STAT_IPCS) && + wx_test_staterr(rx_desc, WX_RXD_ERR_IPE)) || + (wx_test_staterr(rx_desc, WX_RXD_STAT_OUTERIPCS) && + wx_test_staterr(rx_desc, WX_RXD_ERR_OUTERIPER))) { + ring->rx_stats.csum_err++; + return; + } + + /* L4 checksum offload flag must set for the below code to work */ + if (!wx_test_staterr(rx_desc, WX_RXD_STAT_L4CS)) + return; + + /* Hardware can't guarantee csum if IPv6 Dest Header found */ + if (dptype.prot != WX_DEC_PTYPE_PROT_SCTP && WX_RXD_IPV6EX(rx_desc)) + return; + + /* if L4 checksum error */ + if (wx_test_staterr(rx_desc, WX_RXD_ERR_TCPE)) { + ring->rx_stats.csum_err++; + return; + } + + /* It must be a TCP or UDP or SCTP packet with a valid checksum */ + skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* If there is an outer header present that might contain a checksum + * we need to bump the checksum level by 1 to reflect the fact that + * we are indicating we validated the inner checksum. + */ + if (dptype.etype >= WX_DEC_PTYPE_ETYPE_IG) + __skb_incr_checksum_unnecessary(skb); + ring->rx_stats.csum_good_cnt++; +} + +static void wx_rx_vlan(struct wx_ring *ring, union wx_rx_desc *rx_desc, + struct sk_buff *skb) +{ + u16 ethertype; + u8 idx = 0; + + if ((ring->netdev->features & + (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX)) && + wx_test_staterr(rx_desc, WX_RXD_STAT_VP)) { + idx = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.pkt_info) & + 0x1c0) >> 6; + ethertype = ring->q_vector->wx->tpid[idx]; + __vlan_hwaccel_put_tag(skb, htons(ethertype), + le16_to_cpu(rx_desc->wb.upper.vlan)); + } +} + +/** + * wx_process_skb_fields - Populate skb header fields from Rx descriptor + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being populated + * + * This function checks the ring, descriptor, and packet information in + * order to populate the hash, checksum, protocol, and + * other fields within the skb. + **/ +static void wx_process_skb_fields(struct wx_ring *rx_ring, + union wx_rx_desc *rx_desc, + struct sk_buff *skb) +{ + wx_rx_hash(rx_ring, rx_desc, skb); + wx_rx_checksum(rx_ring, rx_desc, skb); + wx_rx_vlan(rx_ring, rx_desc, skb); + skb_record_rx_queue(skb, rx_ring->queue_index); + skb->protocol = eth_type_trans(skb, rx_ring->netdev); +} + /** * wx_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * @q_vector: structure containing interrupt and ring information @@ -486,8 +739,8 @@ static int wx_clean_rx_irq(struct wx_q_vector *q_vector, /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; - skb_record_rx_queue(skb, rx_ring->queue_index); - skb->protocol = eth_type_trans(skb, rx_ring->netdev); + /* populate checksum, timestamp, VLAN, and protocol */ + wx_process_skb_fields(rx_ring, rx_desc, skb); napi_gro_receive(&q_vector->napi, skb); /* update budget accounting */ @@ -707,11 +960,50 @@ static int wx_maybe_stop_tx(struct wx_ring *tx_ring, u16 size) return 0; } +static u32 wx_tx_cmd_type(u32 tx_flags) +{ + /* set type for advanced descriptor with frame checksum insertion */ + u32 cmd_type = WX_TXD_DTYP_DATA | WX_TXD_IFCS; + + /* set HW vlan bit if vlan is present */ + cmd_type |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_HW_VLAN, WX_TXD_VLE); + /* set segmentation enable bits for TSO/FSO */ + cmd_type |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_TSO, WX_TXD_TSE); + /* set timestamp bit if present */ + cmd_type |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_TSTAMP, WX_TXD_MAC_TSTAMP); + cmd_type |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_LINKSEC, WX_TXD_LINKSEC); + + return cmd_type; +} + +static void wx_tx_olinfo_status(union wx_tx_desc *tx_desc, + u32 tx_flags, unsigned int paylen) +{ + u32 olinfo_status = paylen << WX_TXD_PAYLEN_SHIFT; + + /* enable L4 checksum for TSO and TX checksum offload */ + olinfo_status |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_CSUM, WX_TXD_L4CS); + /* enable IPv4 checksum for TSO */ + olinfo_status |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_IPV4, WX_TXD_IIPCS); + /* enable outer IPv4 checksum for TSO */ + olinfo_status |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_OUTER_IPV4, + WX_TXD_EIPCS); + /* Check Context must be set if Tx switch is enabled, which it + * always is for case where virtual functions are running + */ + olinfo_status |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_CC, WX_TXD_CC); + olinfo_status |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_IPSEC, + WX_TXD_IPSEC); + tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); +} + static void wx_tx_map(struct wx_ring *tx_ring, - struct wx_tx_buffer *first) + struct wx_tx_buffer *first, + const u8 hdr_len) { struct sk_buff *skb = first->skb; struct wx_tx_buffer *tx_buffer; + u32 tx_flags = first->tx_flags; u16 i = tx_ring->next_to_use; unsigned int data_len, size; union wx_tx_desc *tx_desc; @@ -719,10 +1011,9 @@ static void wx_tx_map(struct wx_ring *tx_ring, dma_addr_t dma; u32 cmd_type; - cmd_type = WX_TXD_DTYP_DATA | WX_TXD_IFCS; + cmd_type = wx_tx_cmd_type(tx_flags); tx_desc = WX_TX_DESC(tx_ring, i); - - tx_desc->read.olinfo_status = cpu_to_le32(skb->len << WX_TXD_PAYLEN_SHIFT); + wx_tx_olinfo_status(tx_desc, tx_flags, skb->len - hdr_len); size = skb_headlen(skb); data_len = skb->data_len; @@ -838,12 +1129,399 @@ dma_error: tx_ring->next_to_use = i; } +static void wx_tx_ctxtdesc(struct wx_ring *tx_ring, u32 vlan_macip_lens, + u32 fcoe_sof_eof, u32 type_tucmd, u32 mss_l4len_idx) +{ + struct wx_tx_context_desc *context_desc; + u16 i = tx_ring->next_to_use; + + context_desc = WX_TX_CTXTDESC(tx_ring, i); + i++; + tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; + + /* set bits to identify this as an advanced context descriptor */ + type_tucmd |= WX_TXD_DTYP_CTXT; + context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); + context_desc->seqnum_seed = cpu_to_le32(fcoe_sof_eof); + context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); + context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); +} + +static void wx_get_ipv6_proto(struct sk_buff *skb, int offset, u8 *nexthdr) +{ + struct ipv6hdr *hdr = (struct ipv6hdr *)(skb->data + offset); + + *nexthdr = hdr->nexthdr; + offset += sizeof(struct ipv6hdr); + while (ipv6_ext_hdr(*nexthdr)) { + struct ipv6_opt_hdr _hdr, *hp; + + if (*nexthdr == NEXTHDR_NONE) + return; + hp = skb_header_pointer(skb, offset, sizeof(_hdr), &_hdr); + if (!hp) + return; + if (*nexthdr == NEXTHDR_FRAGMENT) + break; + *nexthdr = hp->nexthdr; + } +} + +union network_header { + struct iphdr *ipv4; + struct ipv6hdr *ipv6; + void *raw; +}; + +static u8 wx_encode_tx_desc_ptype(const struct wx_tx_buffer *first) +{ + u8 tun_prot = 0, l4_prot = 0, ptype = 0; + struct sk_buff *skb = first->skb; + + if (skb->encapsulation) { + union network_header hdr; + + switch (first->protocol) { + case htons(ETH_P_IP): + tun_prot = ip_hdr(skb)->protocol; + ptype = WX_PTYPE_TUN_IPV4; + break; + case htons(ETH_P_IPV6): + wx_get_ipv6_proto(skb, skb_network_offset(skb), &tun_prot); + ptype = WX_PTYPE_TUN_IPV6; + break; + default: + return ptype; + } + + if (tun_prot == IPPROTO_IPIP) { + hdr.raw = (void *)inner_ip_hdr(skb); + ptype |= WX_PTYPE_PKT_IPIP; + } else if (tun_prot == IPPROTO_UDP) { + hdr.raw = (void *)inner_ip_hdr(skb); + if (skb->inner_protocol_type != ENCAP_TYPE_ETHER || + skb->inner_protocol != htons(ETH_P_TEB)) { + ptype |= WX_PTYPE_PKT_IG; + } else { + if (((struct ethhdr *)skb_inner_mac_header(skb))->h_proto + == htons(ETH_P_8021Q)) + ptype |= WX_PTYPE_PKT_IGMV; + else + ptype |= WX_PTYPE_PKT_IGM; + } + + } else if (tun_prot == IPPROTO_GRE) { + hdr.raw = (void *)inner_ip_hdr(skb); + if (skb->inner_protocol == htons(ETH_P_IP) || + skb->inner_protocol == htons(ETH_P_IPV6)) { + ptype |= WX_PTYPE_PKT_IG; + } else { + if (((struct ethhdr *)skb_inner_mac_header(skb))->h_proto + == htons(ETH_P_8021Q)) + ptype |= WX_PTYPE_PKT_IGMV; + else + ptype |= WX_PTYPE_PKT_IGM; + } + } else { + return ptype; + } + + switch (hdr.ipv4->version) { + case IPVERSION: + l4_prot = hdr.ipv4->protocol; + break; + case 6: + wx_get_ipv6_proto(skb, skb_inner_network_offset(skb), &l4_prot); + ptype |= WX_PTYPE_PKT_IPV6; + break; + default: + return ptype; + } + } else { + switch (first->protocol) { + case htons(ETH_P_IP): + l4_prot = ip_hdr(skb)->protocol; + ptype = WX_PTYPE_PKT_IP; + break; + case htons(ETH_P_IPV6): + wx_get_ipv6_proto(skb, skb_network_offset(skb), &l4_prot); + ptype = WX_PTYPE_PKT_IP | WX_PTYPE_PKT_IPV6; + break; + default: + return WX_PTYPE_PKT_MAC | WX_PTYPE_TYP_MAC; + } + } + switch (l4_prot) { + case IPPROTO_TCP: + ptype |= WX_PTYPE_TYP_TCP; + break; + case IPPROTO_UDP: + ptype |= WX_PTYPE_TYP_UDP; + break; + case IPPROTO_SCTP: + ptype |= WX_PTYPE_TYP_SCTP; + break; + default: + ptype |= WX_PTYPE_TYP_IP; + break; + } + + return ptype; +} + +static int wx_tso(struct wx_ring *tx_ring, struct wx_tx_buffer *first, + u8 *hdr_len, u8 ptype) +{ + u32 vlan_macip_lens, type_tucmd, mss_l4len_idx; + struct net_device *netdev = tx_ring->netdev; + u32 l4len, tunhdr_eiplen_tunlen = 0; + struct sk_buff *skb = first->skb; + bool enc = skb->encapsulation; + struct ipv6hdr *ipv6h; + struct tcphdr *tcph; + struct iphdr *iph; + u8 tun_prot = 0; + int err; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + if (!skb_is_gso(skb)) + return 0; + + err = skb_cow_head(skb, 0); + if (err < 0) + return err; + + /* indicates the inner headers in the skbuff are valid. */ + iph = enc ? inner_ip_hdr(skb) : ip_hdr(skb); + if (iph->version == 4) { + tcph = enc ? inner_tcp_hdr(skb) : tcp_hdr(skb); + iph->tot_len = 0; + iph->check = 0; + tcph->check = ~csum_tcpudp_magic(iph->saddr, + iph->daddr, 0, + IPPROTO_TCP, 0); + first->tx_flags |= WX_TX_FLAGS_TSO | + WX_TX_FLAGS_CSUM | + WX_TX_FLAGS_IPV4 | + WX_TX_FLAGS_CC; + } else if (iph->version == 6 && skb_is_gso_v6(skb)) { + ipv6h = enc ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); + tcph = enc ? inner_tcp_hdr(skb) : tcp_hdr(skb); + ipv6h->payload_len = 0; + tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, + &ipv6h->daddr, 0, + IPPROTO_TCP, 0); + first->tx_flags |= WX_TX_FLAGS_TSO | + WX_TX_FLAGS_CSUM | + WX_TX_FLAGS_CC; + } + + /* compute header lengths */ + l4len = enc ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb); + *hdr_len = enc ? (skb_inner_transport_header(skb) - skb->data) : + skb_transport_offset(skb); + *hdr_len += l4len; + + /* update gso size and bytecount with header size */ + first->gso_segs = skb_shinfo(skb)->gso_segs; + first->bytecount += (first->gso_segs - 1) * *hdr_len; + + /* mss_l4len_id: use 0 as index for TSO */ + mss_l4len_idx = l4len << WX_TXD_L4LEN_SHIFT; + mss_l4len_idx |= skb_shinfo(skb)->gso_size << WX_TXD_MSS_SHIFT; + + /* vlan_macip_lens: HEADLEN, MACLEN, VLAN tag */ + if (enc) { + switch (first->protocol) { + case htons(ETH_P_IP): + tun_prot = ip_hdr(skb)->protocol; + first->tx_flags |= WX_TX_FLAGS_OUTER_IPV4; + break; + case htons(ETH_P_IPV6): + tun_prot = ipv6_hdr(skb)->nexthdr; + break; + default: + break; + } + switch (tun_prot) { + case IPPROTO_UDP: + tunhdr_eiplen_tunlen = WX_TXD_TUNNEL_UDP; + tunhdr_eiplen_tunlen |= ((skb_network_header_len(skb) >> 2) << + WX_TXD_OUTER_IPLEN_SHIFT) | + (((skb_inner_mac_header(skb) - + skb_transport_header(skb)) >> 1) << + WX_TXD_TUNNEL_LEN_SHIFT); + break; + case IPPROTO_GRE: + tunhdr_eiplen_tunlen = WX_TXD_TUNNEL_GRE; + tunhdr_eiplen_tunlen |= ((skb_network_header_len(skb) >> 2) << + WX_TXD_OUTER_IPLEN_SHIFT) | + (((skb_inner_mac_header(skb) - + skb_transport_header(skb)) >> 1) << + WX_TXD_TUNNEL_LEN_SHIFT); + break; + case IPPROTO_IPIP: + tunhdr_eiplen_tunlen = (((char *)inner_ip_hdr(skb) - + (char *)ip_hdr(skb)) >> 2) << + WX_TXD_OUTER_IPLEN_SHIFT; + break; + default: + break; + } + vlan_macip_lens = skb_inner_network_header_len(skb) >> 1; + } else { + vlan_macip_lens = skb_network_header_len(skb) >> 1; + } + + vlan_macip_lens |= skb_network_offset(skb) << WX_TXD_MACLEN_SHIFT; + vlan_macip_lens |= first->tx_flags & WX_TX_FLAGS_VLAN_MASK; + + type_tucmd = ptype << 24; + if (skb->vlan_proto == htons(ETH_P_8021AD) && + netdev->features & NETIF_F_HW_VLAN_STAG_TX) + type_tucmd |= WX_SET_FLAG(first->tx_flags, + WX_TX_FLAGS_HW_VLAN, + 0x1 << WX_TXD_TAG_TPID_SEL_SHIFT); + wx_tx_ctxtdesc(tx_ring, vlan_macip_lens, tunhdr_eiplen_tunlen, + type_tucmd, mss_l4len_idx); + + return 1; +} + +static void wx_tx_csum(struct wx_ring *tx_ring, struct wx_tx_buffer *first, + u8 ptype) +{ + u32 tunhdr_eiplen_tunlen = 0, vlan_macip_lens = 0; + struct net_device *netdev = tx_ring->netdev; + u32 mss_l4len_idx = 0, type_tucmd; + struct sk_buff *skb = first->skb; + u8 tun_prot = 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + if (!(first->tx_flags & WX_TX_FLAGS_HW_VLAN) && + !(first->tx_flags & WX_TX_FLAGS_CC)) + return; + vlan_macip_lens = skb_network_offset(skb) << + WX_TXD_MACLEN_SHIFT; + } else { + u8 l4_prot = 0; + union { + struct iphdr *ipv4; + struct ipv6hdr *ipv6; + u8 *raw; + } network_hdr; + union { + struct tcphdr *tcphdr; + u8 *raw; + } transport_hdr; + + if (skb->encapsulation) { + network_hdr.raw = skb_inner_network_header(skb); + transport_hdr.raw = skb_inner_transport_header(skb); + vlan_macip_lens = skb_network_offset(skb) << + WX_TXD_MACLEN_SHIFT; + switch (first->protocol) { + case htons(ETH_P_IP): + tun_prot = ip_hdr(skb)->protocol; + break; + case htons(ETH_P_IPV6): + tun_prot = ipv6_hdr(skb)->nexthdr; + break; + default: + return; + } + switch (tun_prot) { + case IPPROTO_UDP: + tunhdr_eiplen_tunlen = WX_TXD_TUNNEL_UDP; + tunhdr_eiplen_tunlen |= + ((skb_network_header_len(skb) >> 2) << + WX_TXD_OUTER_IPLEN_SHIFT) | + (((skb_inner_mac_header(skb) - + skb_transport_header(skb)) >> 1) << + WX_TXD_TUNNEL_LEN_SHIFT); + break; + case IPPROTO_GRE: + tunhdr_eiplen_tunlen = WX_TXD_TUNNEL_GRE; + tunhdr_eiplen_tunlen |= ((skb_network_header_len(skb) >> 2) << + WX_TXD_OUTER_IPLEN_SHIFT) | + (((skb_inner_mac_header(skb) - + skb_transport_header(skb)) >> 1) << + WX_TXD_TUNNEL_LEN_SHIFT); + break; + case IPPROTO_IPIP: + tunhdr_eiplen_tunlen = (((char *)inner_ip_hdr(skb) - + (char *)ip_hdr(skb)) >> 2) << + WX_TXD_OUTER_IPLEN_SHIFT; + break; + default: + break; + } + + } else { + network_hdr.raw = skb_network_header(skb); + transport_hdr.raw = skb_transport_header(skb); + vlan_macip_lens = skb_network_offset(skb) << + WX_TXD_MACLEN_SHIFT; + } + + switch (network_hdr.ipv4->version) { + case IPVERSION: + vlan_macip_lens |= (transport_hdr.raw - network_hdr.raw) >> 1; + l4_prot = network_hdr.ipv4->protocol; + break; + case 6: + vlan_macip_lens |= (transport_hdr.raw - network_hdr.raw) >> 1; + l4_prot = network_hdr.ipv6->nexthdr; + break; + default: + break; + } + + switch (l4_prot) { + case IPPROTO_TCP: + mss_l4len_idx = (transport_hdr.tcphdr->doff * 4) << + WX_TXD_L4LEN_SHIFT; + break; + case IPPROTO_SCTP: + mss_l4len_idx = sizeof(struct sctphdr) << + WX_TXD_L4LEN_SHIFT; + break; + case IPPROTO_UDP: + mss_l4len_idx = sizeof(struct udphdr) << + WX_TXD_L4LEN_SHIFT; + break; + default: + break; + } + + /* update TX checksum flag */ + first->tx_flags |= WX_TX_FLAGS_CSUM; + } + first->tx_flags |= WX_TX_FLAGS_CC; + /* vlan_macip_lens: MACLEN, VLAN tag */ + vlan_macip_lens |= first->tx_flags & WX_TX_FLAGS_VLAN_MASK; + + type_tucmd = ptype << 24; + if (skb->vlan_proto == htons(ETH_P_8021AD) && + netdev->features & NETIF_F_HW_VLAN_STAG_TX) + type_tucmd |= WX_SET_FLAG(first->tx_flags, + WX_TX_FLAGS_HW_VLAN, + 0x1 << WX_TXD_TAG_TPID_SEL_SHIFT); + wx_tx_ctxtdesc(tx_ring, vlan_macip_lens, tunhdr_eiplen_tunlen, + type_tucmd, mss_l4len_idx); +} + static netdev_tx_t wx_xmit_frame_ring(struct sk_buff *skb, struct wx_ring *tx_ring) { u16 count = TXD_USE_COUNT(skb_headlen(skb)); struct wx_tx_buffer *first; + u8 hdr_len = 0, ptype; unsigned short f; + u32 tx_flags = 0; + int tso; /* need: 1 descriptor per page * PAGE_SIZE/WX_MAX_DATA_PER_TXD, * + 1 desc for skb_headlen/WX_MAX_DATA_PER_TXD, @@ -864,7 +1542,29 @@ static netdev_tx_t wx_xmit_frame_ring(struct sk_buff *skb, first->bytecount = skb->len; first->gso_segs = 1; - wx_tx_map(tx_ring, first); + /* if we have a HW VLAN tag being added default to the HW one */ + if (skb_vlan_tag_present(skb)) { + tx_flags |= skb_vlan_tag_get(skb) << WX_TX_FLAGS_VLAN_SHIFT; + tx_flags |= WX_TX_FLAGS_HW_VLAN; + } + + /* record initial flags and protocol */ + first->tx_flags = tx_flags; + first->protocol = vlan_get_protocol(skb); + + ptype = wx_encode_tx_desc_ptype(first); + + tso = wx_tso(tx_ring, first, &hdr_len, ptype); + if (tso < 0) + goto out_drop; + else if (!tso) + wx_tx_csum(tx_ring, first, ptype); + wx_tx_map(tx_ring, first, hdr_len); + + return NETDEV_TX_OK; +out_drop: + dev_kfree_skb_any(first->skb); + first->skb = NULL; return NETDEV_TX_OK; } @@ -1348,7 +2048,8 @@ void wx_free_irq(struct wx *wx) free_irq(entry->vector, q_vector); } - free_irq(wx->msix_entries[vector].vector, wx); + if (wx->mac.type == wx_mac_em) + free_irq(wx->msix_entries[vector].vector, wx); } EXPORT_SYMBOL(wx_free_irq); @@ -2004,4 +2705,24 @@ void wx_get_stats64(struct net_device *netdev, } EXPORT_SYMBOL(wx_get_stats64); +int wx_set_features(struct net_device *netdev, netdev_features_t features) +{ + netdev_features_t changed = netdev->features ^ features; + struct wx *wx = netdev_priv(netdev); + + if (changed & NETIF_F_RXHASH) + wr32m(wx, WX_RDB_RA_CTL, WX_RDB_RA_CTL_RSS_EN, + WX_RDB_RA_CTL_RSS_EN); + else + wr32m(wx, WX_RDB_RA_CTL, WX_RDB_RA_CTL_RSS_EN, 0); + + if (changed & + (NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_STAG_RX)) + wx_set_rx_mode(netdev); + + return 1; +} +EXPORT_SYMBOL(wx_set_features); + MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.h b/drivers/net/ethernet/wangxun/libwx/wx_lib.h index 50ee41f1fa10..df1f4a5951f0 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.h @@ -28,5 +28,6 @@ void wx_free_resources(struct wx *wx); int wx_setup_resources(struct wx *wx); void wx_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats); +int wx_set_features(struct net_device *netdev, netdev_features_t features); #endif /* _NGBE_LIB_H_ */ diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 32f952d93009..29dfb561887d 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -6,6 +6,8 @@ #include <linux/bitfield.h> #include <linux/netdevice.h> +#include <linux/if_vlan.h> +#include <net/ip.h> #define WX_NCSI_SUP 0x8000 #define WX_NCSI_MASK 0x8000 @@ -64,6 +66,8 @@ #define WX_CFG_PORT_CTL_QINQ BIT(2) #define WX_CFG_PORT_CTL_D_VLAN BIT(0) /* double vlan*/ #define WX_CFG_TAG_TPID(_i) (0x14430 + ((_i) * 4)) +#define WX_CFG_PORT_CTL_NUM_VT_MASK GENMASK(13, 12) /* number of TVs */ + /* GPIO Registers */ #define WX_GPIO_DR 0x14800 @@ -79,7 +83,9 @@ #define WX_GPIO_INTMASK 0x14834 #define WX_GPIO_INTTYPE_LEVEL 0x14838 #define WX_GPIO_POLARITY 0x1483C +#define WX_GPIO_INTSTATUS 0x14844 #define WX_GPIO_EOI 0x1484C +#define WX_GPIO_EXT 0x14850 /*********************** Transmit DMA registers **************************/ /* transmit global control */ @@ -87,6 +93,8 @@ /* TDM CTL BIT */ #define WX_TDM_CTL_TE BIT(0) /* Transmit Enable */ #define WX_TDM_PB_THRE(_i) (0x18020 + ((_i) * 4)) +#define WX_TDM_RP_IDX 0x1820C +#define WX_TDM_RP_RATE 0x18404 /***************************** RDB registers *********************************/ /* receive packet buffer */ @@ -105,6 +113,8 @@ #define WX_RDB_PL_CFG_L2HDR BIT(3) #define WX_RDB_PL_CFG_TUN_TUNHDR BIT(4) #define WX_RDB_PL_CFG_TUN_OUTL2HDR BIT(5) +#define WX_RDB_RA_CTL 0x194F4 +#define WX_RDB_RA_CTL_RSS_EN BIT(2) /* RSS Enable */ /******************************* PSR Registers *******************************/ /* psr control */ @@ -150,6 +160,9 @@ #define WX_PSR_LAN_FLEX_DW_H(_i) (0x15C04 + ((_i) * 16)) #define WX_PSR_LAN_FLEX_MSK(_i) (0x15C08 + ((_i) * 16)) +/* vlan tbl */ +#define WX_PSR_VLAN_TBL(_i) (0x16000 + ((_i) * 4)) + /* mac switcher */ #define WX_PSR_MAC_SWC_AD_L 0x16200 #define WX_PSR_MAC_SWC_AD_H 0x16204 @@ -161,6 +174,15 @@ #define WX_PSR_MAC_SWC_IDX 0x16210 #define WX_CLEAR_VMDQ_ALL 0xFFFFFFFFU +/* vlan switch */ +#define WX_PSR_VLAN_SWC 0x16220 +#define WX_PSR_VLAN_SWC_VM_L 0x16224 +#define WX_PSR_VLAN_SWC_VM_H 0x16228 +#define WX_PSR_VLAN_SWC_IDX 0x16230 /* 64 vlan entries */ +/* VLAN pool filtering masks */ +#define WX_PSR_VLAN_SWC_VIEN BIT(31) /* filter is valid */ +#define WX_PSR_VLAN_SWC_ENTRIES 64 + /********************************* RSEC **************************************/ /* general rsec */ #define WX_RSC_CTL 0x17000 @@ -255,6 +277,7 @@ #define WX_PX_RR_RP(_i) (0x0100C + ((_i) * 0x40)) #define WX_PX_RR_CFG(_i) (0x01010 + ((_i) * 0x40)) /* PX_RR_CFG bit definitions */ +#define WX_PX_RR_CFG_VLAN BIT(31) #define WX_PX_RR_CFG_SPLIT_MODE BIT(26) #define WX_PX_RR_CFG_RR_THER_SHIFT 16 #define WX_PX_RR_CFG_RR_HDR_SZ GENMASK(15, 12) @@ -296,6 +319,7 @@ #define WX_MAX_TXD 8192 #define WX_MAX_JUMBO_FRAME_SIZE 9432 /* max payload 9414 */ +#define VMDQ_P(p) p /* Supported Rx Buffer Sizes */ #define WX_RXBUFFER_256 256 /* Used for skb receive header */ @@ -315,17 +339,64 @@ #define TXD_USE_COUNT(S) DIV_ROUND_UP((S), WX_MAX_DATA_PER_TXD) #define DESC_NEEDED (MAX_SKB_FRAGS + 4) -/* Ether Types */ -#define WX_ETH_P_CNM 0x22E7 - #define WX_CFG_PORT_ST 0x14404 /******************* Receive Descriptor bit definitions **********************/ #define WX_RXD_STAT_DD BIT(0) /* Done */ #define WX_RXD_STAT_EOP BIT(1) /* End of Packet */ +#define WX_RXD_STAT_VP BIT(5) /* IEEE VLAN Pkt */ +#define WX_RXD_STAT_L4CS BIT(7) /* L4 xsum calculated */ +#define WX_RXD_STAT_IPCS BIT(8) /* IP xsum calculated */ +#define WX_RXD_STAT_OUTERIPCS BIT(10) /* Cloud IP xsum calculated*/ +#define WX_RXD_ERR_OUTERIPER BIT(26) /* CRC IP Header error */ #define WX_RXD_ERR_RXE BIT(29) /* Any MAC Error */ - +#define WX_RXD_ERR_TCPE BIT(30) /* TCP/UDP Checksum Error */ +#define WX_RXD_ERR_IPE BIT(31) /* IP Checksum Error */ + +/* RSS Hash results */ +#define WX_RXD_RSSTYPE_MASK GENMASK(3, 0) +#define WX_RXD_RSSTYPE_IPV4_TCP 0x00000001U +#define WX_RXD_RSSTYPE_IPV6_TCP 0x00000003U +#define WX_RXD_RSSTYPE_IPV4_SCTP 0x00000004U +#define WX_RXD_RSSTYPE_IPV6_SCTP 0x00000006U +#define WX_RXD_RSSTYPE_IPV4_UDP 0x00000007U +#define WX_RXD_RSSTYPE_IPV6_UDP 0x00000008U + +#define WX_RSS_L4_TYPES_MASK \ + ((1ul << WX_RXD_RSSTYPE_IPV4_TCP) | \ + (1ul << WX_RXD_RSSTYPE_IPV4_UDP) | \ + (1ul << WX_RXD_RSSTYPE_IPV4_SCTP) | \ + (1ul << WX_RXD_RSSTYPE_IPV6_TCP) | \ + (1ul << WX_RXD_RSSTYPE_IPV6_UDP) | \ + (1ul << WX_RXD_RSSTYPE_IPV6_SCTP)) +/* TUN */ +#define WX_PTYPE_TUN_IPV4 0x80 +#define WX_PTYPE_TUN_IPV6 0xC0 + +/* PKT for TUN */ +#define WX_PTYPE_PKT_IPIP 0x00 /* IP+IP */ +#define WX_PTYPE_PKT_IG 0x10 /* IP+GRE */ +#define WX_PTYPE_PKT_IGM 0x20 /* IP+GRE+MAC */ +#define WX_PTYPE_PKT_IGMV 0x30 /* IP+GRE+MAC+VLAN */ +/* PKT for !TUN */ +#define WX_PTYPE_PKT_MAC 0x10 +#define WX_PTYPE_PKT_IP 0x20 + +/* TYP for PKT=mac */ +#define WX_PTYPE_TYP_MAC 0x01 +/* TYP for PKT=ip */ +#define WX_PTYPE_PKT_IPV6 0x08 +#define WX_PTYPE_TYP_IPFRAG 0x01 +#define WX_PTYPE_TYP_IP 0x02 +#define WX_PTYPE_TYP_UDP 0x03 +#define WX_PTYPE_TYP_TCP 0x04 +#define WX_PTYPE_TYP_SCTP 0x05 + +#define WX_RXD_PKTTYPE(_rxd) \ + ((le32_to_cpu((_rxd)->wb.lower.lo_dword.data) >> 9) & 0xFF) +#define WX_RXD_IPV6EX(_rxd) \ + ((le32_to_cpu((_rxd)->wb.lower.lo_dword.data) >> 6) & 0x1) /*********************** Transmit Descriptor Config Masks ****************/ #define WX_TXD_STAT_DD BIT(0) /* Descriptor Done */ #define WX_TXD_DTYP_DATA 0 /* Adv Data Descriptor */ @@ -334,6 +405,113 @@ #define WX_TXD_IFCS BIT(25) /* Insert FCS */ #define WX_TXD_RS BIT(27) /* Report Status */ +/*********************** Adv Transmit Descriptor Config Masks ****************/ +#define WX_TXD_MAC_TSTAMP BIT(19) /* IEEE1588 time stamp */ +#define WX_TXD_DTYP_CTXT BIT(20) /* Adv Context Desc */ +#define WX_TXD_LINKSEC BIT(26) /* enable linksec */ +#define WX_TXD_VLE BIT(30) /* VLAN pkt enable */ +#define WX_TXD_TSE BIT(31) /* TCP Seg enable */ +#define WX_TXD_CC BIT(7) /* Check Context */ +#define WX_TXD_IPSEC BIT(8) /* enable ipsec esp */ +#define WX_TXD_L4CS BIT(9) +#define WX_TXD_IIPCS BIT(10) +#define WX_TXD_EIPCS BIT(11) +#define WX_TXD_PAYLEN_SHIFT 13 /* Adv desc PAYLEN shift */ +#define WX_TXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */ +#define WX_TXD_TAG_TPID_SEL_SHIFT 11 + +#define WX_TXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */ +#define WX_TXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */ + +#define WX_TXD_OUTER_IPLEN_SHIFT 12 /* Adv ctxt OUTERIPLEN shift */ +#define WX_TXD_TUNNEL_LEN_SHIFT 21 /* Adv ctxt TUNNELLEN shift */ +#define WX_TXD_TUNNEL_TYPE_SHIFT 11 /* Adv Tx Desc Tunnel Type shift */ +#define WX_TXD_TUNNEL_UDP FIELD_PREP(BIT(WX_TXD_TUNNEL_TYPE_SHIFT), 0) +#define WX_TXD_TUNNEL_GRE FIELD_PREP(BIT(WX_TXD_TUNNEL_TYPE_SHIFT), 1) + +enum wx_tx_flags { + /* cmd_type flags */ + WX_TX_FLAGS_HW_VLAN = 0x01, + WX_TX_FLAGS_TSO = 0x02, + WX_TX_FLAGS_TSTAMP = 0x04, + + /* olinfo flags */ + WX_TX_FLAGS_CC = 0x08, + WX_TX_FLAGS_IPV4 = 0x10, + WX_TX_FLAGS_CSUM = 0x20, + WX_TX_FLAGS_OUTER_IPV4 = 0x100, + WX_TX_FLAGS_LINKSEC = 0x200, + WX_TX_FLAGS_IPSEC = 0x400, +}; + +/* VLAN info */ +#define WX_TX_FLAGS_VLAN_MASK GENMASK(31, 16) +#define WX_TX_FLAGS_VLAN_SHIFT 16 + +/* wx_dec_ptype.mac: outer mac */ +enum wx_dec_ptype_mac { + WX_DEC_PTYPE_MAC_IP = 0, + WX_DEC_PTYPE_MAC_L2 = 2, + WX_DEC_PTYPE_MAC_FCOE = 3, +}; + +/* wx_dec_ptype.[e]ip: outer&encaped ip */ +#define WX_DEC_PTYPE_IP_FRAG 0x4 +enum wx_dec_ptype_ip { + WX_DEC_PTYPE_IP_NONE = 0, + WX_DEC_PTYPE_IP_IPV4 = 1, + WX_DEC_PTYPE_IP_IPV6 = 2, + WX_DEC_PTYPE_IP_FGV4 = WX_DEC_PTYPE_IP_FRAG | WX_DEC_PTYPE_IP_IPV4, + WX_DEC_PTYPE_IP_FGV6 = WX_DEC_PTYPE_IP_FRAG | WX_DEC_PTYPE_IP_IPV6, +}; + +/* wx_dec_ptype.etype: encaped type */ +enum wx_dec_ptype_etype { + WX_DEC_PTYPE_ETYPE_NONE = 0, + WX_DEC_PTYPE_ETYPE_IPIP = 1, /* IP+IP */ + WX_DEC_PTYPE_ETYPE_IG = 2, /* IP+GRE */ + WX_DEC_PTYPE_ETYPE_IGM = 3, /* IP+GRE+MAC */ + WX_DEC_PTYPE_ETYPE_IGMV = 4, /* IP+GRE+MAC+VLAN */ +}; + +/* wx_dec_ptype.proto: payload proto */ +enum wx_dec_ptype_prot { + WX_DEC_PTYPE_PROT_NONE = 0, + WX_DEC_PTYPE_PROT_UDP = 1, + WX_DEC_PTYPE_PROT_TCP = 2, + WX_DEC_PTYPE_PROT_SCTP = 3, + WX_DEC_PTYPE_PROT_ICMP = 4, + WX_DEC_PTYPE_PROT_TS = 5, /* time sync */ +}; + +/* wx_dec_ptype.layer: payload layer */ +enum wx_dec_ptype_layer { + WX_DEC_PTYPE_LAYER_NONE = 0, + WX_DEC_PTYPE_LAYER_PAY2 = 1, + WX_DEC_PTYPE_LAYER_PAY3 = 2, + WX_DEC_PTYPE_LAYER_PAY4 = 3, +}; + +struct wx_dec_ptype { + u32 known:1; + u32 mac:2; /* outer mac */ + u32 ip:3; /* outer ip*/ + u32 etype:3; /* encaped type */ + u32 eip:3; /* encaped ip */ + u32 prot:4; /* payload proto */ + u32 layer:3; /* payload layer */ +}; + +/* macro to make the table lines short */ +#define WX_PTT(mac, ip, etype, eip, proto, layer)\ + {1, \ + WX_DEC_PTYPE_MAC_##mac, /* mac */\ + WX_DEC_PTYPE_IP_##ip, /* ip */ \ + WX_DEC_PTYPE_ETYPE_##etype, /* etype */\ + WX_DEC_PTYPE_IP_##eip, /* eip */\ + WX_DEC_PTYPE_PROT_##proto, /* proto */\ + WX_DEC_PTYPE_LAYER_##layer /* layer */} + /* Host Interface Command Structures */ struct wx_hic_hdr { u8 cmd; @@ -412,6 +590,8 @@ struct wx_mac_info { u32 mta_shadow[128]; s32 mc_filter_type; u32 mcft_size; + u32 vft_shadow[128]; + u32 vft_size; u32 num_rar_entries; u32 rx_pb_size; u32 tx_pb_size; @@ -508,10 +688,25 @@ union wx_rx_desc { } wb; /* writeback */ }; +struct wx_tx_context_desc { + __le32 vlan_macip_lens; + __le32 seqnum_seed; + __le32 type_tucmd_mlhl; + __le32 mss_l4len_idx; +}; + +/* if _flag is in _input, return _result */ +#define WX_SET_FLAG(_input, _flag, _result) \ + (((_flag) <= (_result)) ? \ + ((u32)((_input) & (_flag)) * ((_result) / (_flag))) : \ + ((u32)((_input) & (_flag)) / ((_flag) / (_result)))) + #define WX_RX_DESC(R, i) \ (&(((union wx_rx_desc *)((R)->desc))[i])) #define WX_TX_DESC(R, i) \ (&(((union wx_tx_desc *)((R)->desc))[i])) +#define WX_TX_CTXTDESC(R, i) \ + (&(((struct wx_tx_context_desc *)((R)->desc))[i])) /* wrapper around a pointer to a socket buffer, * so a DMA handle can be stored along with the buffer @@ -523,6 +718,8 @@ struct wx_tx_buffer { unsigned short gso_segs; DEFINE_DMA_UNMAP_ADDR(dma); DEFINE_DMA_UNMAP_LEN(len); + __be16 protocol; + u32 tx_flags; }; struct wx_rx_buffer { @@ -539,6 +736,11 @@ struct wx_queue_stats { u64 bytes; }; +struct wx_rx_queue_stats { + u64 csum_good_cnt; + u64 csum_err; +}; + /* iterator for handling rings in ring container */ #define wx_for_each_ring(posm, headm) \ for (posm = (headm).ring; posm; posm = posm->next) @@ -550,7 +752,6 @@ struct wx_ring_container { u8 count; /* total number of rings in vector */ u8 itr; /* current ITR setting for ring */ }; - struct wx_ring { struct wx_ring *next; /* pointer to next ring in q_vector */ struct wx_q_vector *q_vector; /* backpointer to host q_vector */ @@ -580,6 +781,9 @@ struct wx_ring { struct wx_queue_stats stats; struct u64_stats_sync syncp; + union { + struct wx_rx_queue_stats rx_stats; + }; } ____cacheline_internodealigned_in_smp; struct wx_q_vector { @@ -598,7 +802,7 @@ struct wx_q_vector { char name[IFNAMSIZ + 17]; /* for dynamic allocation of rings associated with this q_vector */ - struct wx_ring ring[0] ____cacheline_internodealigned_in_smp; + struct wx_ring ring[] ____cacheline_internodealigned_in_smp; }; enum wx_isb_idx { @@ -610,6 +814,9 @@ enum wx_isb_idx { }; struct wx { + unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; + + void *priv; u8 __iomem *hw_addr; struct pci_dev *pdev; struct net_device *netdev; @@ -642,6 +849,7 @@ struct wx { bool wol_enabled; bool ncsi_enabled; bool gpio_ctrl; + raw_spinlock_t gpio_lock; /* Tx fast path data */ int num_tx_queues; diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index df6b870aa871..c99a5d3de72e 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -115,6 +115,7 @@ static int ngbe_sw_init(struct wx *wx) wx->mac.max_rx_queues = NGBE_MAX_RX_QUEUES; wx->mac.max_tx_queues = NGBE_MAX_TX_QUEUES; wx->mac.mcft_size = NGBE_MC_TBL_SIZE; + wx->mac.vft_size = NGBE_SP_VFT_TBL_SIZE; wx->mac.rx_pb_size = NGBE_RX_PB_SIZE; wx->mac.tx_pb_size = NGBE_TDB_PB_SZ; @@ -473,9 +474,12 @@ static const struct net_device_ops ngbe_netdev_ops = { .ndo_change_mtu = wx_change_mtu, .ndo_start_xmit = wx_xmit_frame, .ndo_set_rx_mode = wx_set_rx_mode, + .ndo_set_features = wx_set_features, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = wx_set_mac, .ndo_get_stats64 = wx_get_stats64, + .ndo_vlan_rx_add_vid = wx_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = wx_vlan_rx_kill_vid, }; /** @@ -551,12 +555,18 @@ static int ngbe_probe(struct pci_dev *pdev, ngbe_set_ethtool_ops(netdev); netdev->netdev_ops = &ngbe_netdev_ops; - netdev->features |= NETIF_F_HIGHDMA; - netdev->features = NETIF_F_SG; - + netdev->features = NETIF_F_SG | NETIF_F_IP_CSUM | + NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_RXHASH | NETIF_F_RXCSUM; + netdev->features |= NETIF_F_SCTP_CRC | NETIF_F_TSO_MANGLEID; + netdev->vlan_features |= netdev->features; + netdev->features |= NETIF_F_IPV6_CSUM | NETIF_F_VLAN_FEATURES; /* copy netdev features into list of user selectable features */ - netdev->hw_features |= netdev->features | - NETIF_F_RXALL; + netdev->hw_features |= netdev->features | NETIF_F_RXALL; + netdev->hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC; + netdev->features |= NETIF_F_HIGHDMA; + netdev->hw_features |= NETIF_F_GRO; + netdev->features |= NETIF_F_GRO; netdev->priv_flags |= IFF_UNICAST_FLT; netdev->priv_flags |= IFF_SUPP_NOFCS; diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h index 373d5af628cd..b70eca397b67 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h @@ -136,6 +136,7 @@ enum NGBE_MSCA_CMD_value { #define NGBE_RAR_ENTRIES 32 #define NGBE_RX_PB_SIZE 42 #define NGBE_MC_TBL_SIZE 128 +#define NGBE_SP_VFT_TBL_SIZE 128 #define NGBE_TDB_PB_SZ (20 * 1024) /* 160KB Packet Buffer */ /* TX/RX descriptor defines */ diff --git a/drivers/net/ethernet/wangxun/txgbe/Makefile b/drivers/net/ethernet/wangxun/txgbe/Makefile index 6db14a2cb2d0..7507f762edfe 100644 --- a/drivers/net/ethernet/wangxun/txgbe/Makefile +++ b/drivers/net/ethernet/wangxun/txgbe/Makefile @@ -8,4 +8,5 @@ obj-$(CONFIG_TXGBE) += txgbe.o txgbe-objs := txgbe_main.o \ txgbe_hw.o \ + txgbe_phy.o \ txgbe_ethtool.o diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c index d914e9a05404..859da112586a 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c @@ -6,11 +6,39 @@ #include <linux/netdevice.h> #include "../libwx/wx_ethtool.h" +#include "../libwx/wx_type.h" +#include "txgbe_type.h" #include "txgbe_ethtool.h" +static int txgbe_nway_reset(struct net_device *netdev) +{ + struct txgbe *txgbe = netdev_to_txgbe(netdev); + + return phylink_ethtool_nway_reset(txgbe->phylink); +} + +static int txgbe_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) +{ + struct txgbe *txgbe = netdev_to_txgbe(netdev); + + return phylink_ethtool_ksettings_get(txgbe->phylink, cmd); +} + +static int txgbe_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) +{ + struct txgbe *txgbe = netdev_to_txgbe(netdev); + + return phylink_ethtool_ksettings_set(txgbe->phylink, cmd); +} + static const struct ethtool_ops txgbe_ethtool_ops = { .get_drvinfo = wx_get_drvinfo, + .nway_reset = txgbe_nway_reset, .get_link = ethtool_op_get_link, + .get_link_ksettings = txgbe_get_link_ksettings, + .set_link_ksettings = txgbe_set_link_ksettings, }; void txgbe_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c index ebc46f3be056..0772eb14eabf 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c @@ -160,34 +160,24 @@ int txgbe_read_pba_string(struct wx *wx, u8 *pba_num, u32 pba_num_size) static int txgbe_calc_eeprom_checksum(struct wx *wx, u16 *checksum) { u16 *eeprom_ptrs = NULL; - u32 buffer_size = 0; - u16 *buffer = NULL; u16 *local_buffer; int status; u16 i; wx_init_eeprom_params(wx); - if (!buffer) { - eeprom_ptrs = kvmalloc_array(TXGBE_EEPROM_LAST_WORD, sizeof(u16), - GFP_KERNEL); - if (!eeprom_ptrs) - return -ENOMEM; - /* Read pointer area */ - status = wx_read_ee_hostif_buffer(wx, 0, - TXGBE_EEPROM_LAST_WORD, - eeprom_ptrs); - if (status != 0) { - wx_err(wx, "Failed to read EEPROM image\n"); - kvfree(eeprom_ptrs); - return status; - } - local_buffer = eeprom_ptrs; - } else { - if (buffer_size < TXGBE_EEPROM_LAST_WORD) - return -EFAULT; - local_buffer = buffer; + eeprom_ptrs = kvmalloc_array(TXGBE_EEPROM_LAST_WORD, sizeof(u16), + GFP_KERNEL); + if (!eeprom_ptrs) + return -ENOMEM; + /* Read pointer area */ + status = wx_read_ee_hostif_buffer(wx, 0, TXGBE_EEPROM_LAST_WORD, eeprom_ptrs); + if (status != 0) { + wx_err(wx, "Failed to read EEPROM image\n"); + kvfree(eeprom_ptrs); + return status; } + local_buffer = eeprom_ptrs; for (i = 0; i < TXGBE_EEPROM_LAST_WORD; i++) if (i != wx->eeprom.sw_region_offset + TXGBE_EEPROM_CHECKSUM) @@ -196,9 +186,6 @@ static int txgbe_calc_eeprom_checksum(struct wx *wx, u16 *checksum) if (eeprom_ptrs) kvfree(eeprom_ptrs); - if (*checksum > TXGBE_EEPROM_SUM) - return -EINVAL; - *checksum = TXGBE_EEPROM_SUM - *checksum; return 0; diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index 5b8a121fb496..46eba6d6188b 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -7,6 +7,7 @@ #include <linux/netdevice.h> #include <linux/string.h> #include <linux/etherdevice.h> +#include <linux/phylink.h> #include <net/ip.h> #include <linux/if_vlan.h> @@ -15,6 +16,7 @@ #include "../libwx/wx_hw.h" #include "txgbe_type.h" #include "txgbe_hw.h" +#include "txgbe_phy.h" #include "txgbe_ethtool.h" char txgbe_driver_name[] = "txgbe"; @@ -81,6 +83,8 @@ static int txgbe_enumerate_functions(struct wx *wx) **/ static void txgbe_irq_enable(struct wx *wx, bool queues) { + wr32(wx, WX_PX_MISC_IEN, TXGBE_PX_MISC_IEN_MASK); + /* unmask interrupt */ wx_intr_enable(wx, TXGBE_INTR_MISC(wx)); if (queues) @@ -128,17 +132,6 @@ static irqreturn_t txgbe_intr(int __always_unused irq, void *data) return IRQ_HANDLED; } -static irqreturn_t txgbe_msix_other(int __always_unused irq, void *data) -{ - struct wx *wx = data; - - /* re-enable the original interrupt state */ - if (netif_running(wx->netdev)) - txgbe_irq_enable(wx, false); - - return IRQ_HANDLED; -} - /** * txgbe_request_msix_irqs - Initialize MSI-X interrupts * @wx: board private structure @@ -170,13 +163,6 @@ static int txgbe_request_msix_irqs(struct wx *wx) } } - err = request_irq(wx->msix_entries[vector].vector, - txgbe_msix_other, 0, netdev->name, wx); - if (err) { - wx_err(wx, "request_irq for msix_other failed: %d\n", err); - goto free_queue_irqs; - } - return 0; free_queue_irqs: @@ -219,7 +205,8 @@ static int txgbe_request_irq(struct wx *wx) static void txgbe_up_complete(struct wx *wx) { - u32 reg; + struct net_device *netdev = wx->netdev; + struct txgbe *txgbe; wx_control_hw(wx, true); wx_configure_vectors(wx); @@ -228,24 +215,17 @@ static void txgbe_up_complete(struct wx *wx) smp_mb__before_atomic(); wx_napi_enable_all(wx); + txgbe = netdev_to_txgbe(netdev); + phylink_start(txgbe->phylink); + /* clear any pending interrupts, may auto mask */ rd32(wx, WX_PX_IC(0)); rd32(wx, WX_PX_IC(1)); rd32(wx, WX_PX_MISC_IC); txgbe_irq_enable(wx, true); - /* Configure MAC Rx and Tx when link is up */ - reg = rd32(wx, WX_MAC_RX_CFG); - wr32(wx, WX_MAC_RX_CFG, reg); - wr32(wx, WX_MAC_PKT_FLT, WX_MAC_PKT_FLT_PR); - reg = rd32(wx, WX_MAC_WDG_TIMEOUT); - wr32(wx, WX_MAC_WDG_TIMEOUT, reg); - reg = rd32(wx, WX_MAC_TX_CFG); - wr32(wx, WX_MAC_TX_CFG, (reg & ~WX_MAC_TX_CFG_SPEED_MASK) | WX_MAC_TX_CFG_SPEED_10G); - /* enable transmits */ - netif_tx_start_all_queues(wx->netdev); - netif_carrier_on(wx->netdev); + netif_tx_start_all_queues(netdev); } static void txgbe_reset(struct wx *wx) @@ -258,6 +238,7 @@ static void txgbe_reset(struct wx *wx) if (err != 0) wx_err(wx, "Hardware Error: %d\n", err); + wx_start_hw(wx); /* do not flush user set addresses */ memcpy(old_addr, &wx->mac_table[0].addr, netdev->addr_len); wx_flush_sw_mac_table(wx); @@ -279,7 +260,6 @@ static void txgbe_disable_device(struct wx *wx) wx_disable_rx_queue(wx, wx->rx_ring[i]); netif_tx_stop_all_queues(netdev); - netif_carrier_off(netdev); netif_tx_disable(netdev); wx_irq_disable(wx); @@ -310,8 +290,11 @@ static void txgbe_disable_device(struct wx *wx) static void txgbe_down(struct wx *wx) { + struct txgbe *txgbe = netdev_to_txgbe(wx->netdev); + txgbe_disable_device(wx); txgbe_reset(wx); + phylink_stop(txgbe->phylink); wx_clean_all_tx_rings(wx); wx_clean_all_rx_rings(wx); @@ -330,6 +313,7 @@ static int txgbe_sw_init(struct wx *wx) wx->mac.max_tx_queues = TXGBE_SP_MAX_TX_QUEUES; wx->mac.max_rx_queues = TXGBE_SP_MAX_RX_QUEUES; wx->mac.mcft_size = TXGBE_SP_MC_TBL_SIZE; + wx->mac.vft_size = TXGBE_SP_VFT_TBL_SIZE; wx->mac.rx_pb_size = TXGBE_SP_RX_PB_SIZE; wx->mac.tx_pb_size = TXGBE_SP_TDB_PB_SZ; @@ -455,7 +439,7 @@ static int txgbe_close(struct net_device *netdev) return 0; } -static void txgbe_dev_shutdown(struct pci_dev *pdev, bool *enable_wake) +static void txgbe_dev_shutdown(struct pci_dev *pdev) { struct wx *wx = pci_get_drvdata(pdev); struct net_device *netdev; @@ -475,12 +459,10 @@ static void txgbe_dev_shutdown(struct pci_dev *pdev, bool *enable_wake) static void txgbe_shutdown(struct pci_dev *pdev) { - bool wake; - - txgbe_dev_shutdown(pdev, &wake); + txgbe_dev_shutdown(pdev); if (system_state == SYSTEM_POWER_OFF) { - pci_wake_from_d3(pdev, wake); + pci_wake_from_d3(pdev, false); pci_set_power_state(pdev, PCI_D3hot); } } @@ -491,9 +473,12 @@ static const struct net_device_ops txgbe_netdev_ops = { .ndo_change_mtu = wx_change_mtu, .ndo_start_xmit = wx_xmit_frame, .ndo_set_rx_mode = wx_set_rx_mode, + .ndo_set_features = wx_set_features, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = wx_set_mac, .ndo_get_stats64 = wx_get_stats64, + .ndo_vlan_rx_add_vid = wx_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = wx_vlan_rx_kill_vid, }; /** @@ -513,6 +498,7 @@ static int txgbe_probe(struct pci_dev *pdev, struct net_device *netdev; int err, expected_gts; struct wx *wx = NULL; + struct txgbe *txgbe; u16 eeprom_verh = 0, eeprom_verl = 0, offset = 0; u16 eeprom_cfg_blkh = 0, eeprom_cfg_blkl = 0; @@ -596,11 +582,25 @@ static int txgbe_probe(struct pci_dev *pdev, goto err_free_mac_table; } - netdev->features |= NETIF_F_HIGHDMA; - netdev->features = NETIF_F_SG; - + netdev->features = NETIF_F_SG | + NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_RXHASH | + NETIF_F_RXCSUM | + NETIF_F_HW_CSUM; + + netdev->gso_partial_features = NETIF_F_GSO_ENCAP_ALL; + netdev->features |= netdev->gso_partial_features; + netdev->features |= NETIF_F_SCTP_CRC; + netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; + netdev->hw_enc_features |= netdev->vlan_features; + netdev->features |= NETIF_F_VLAN_FEATURES; /* copy netdev features into list of user selectable features */ netdev->hw_features |= netdev->features | NETIF_F_RXALL; + netdev->hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC; + netdev->features |= NETIF_F_HIGHDMA; + netdev->hw_features |= NETIF_F_GRO; + netdev->features |= NETIF_F_GRO; netdev->priv_flags |= IFF_UNICAST_FLT; netdev->priv_flags |= IFF_SUPP_NOFCS; @@ -663,10 +663,23 @@ static int txgbe_probe(struct pci_dev *pdev, "0x%08x", etrack_id); } - err = register_netdev(netdev); + txgbe = devm_kzalloc(&pdev->dev, sizeof(*txgbe), GFP_KERNEL); + if (!txgbe) { + err = -ENOMEM; + goto err_release_hw; + } + + txgbe->wx = wx; + wx->priv = txgbe; + + err = txgbe_init_phy(txgbe); if (err) goto err_release_hw; + err = register_netdev(netdev); + if (err) + goto err_remove_phy; + pci_set_drvdata(pdev, wx); netif_tx_stop_all_queues(netdev); @@ -694,6 +707,8 @@ static int txgbe_probe(struct pci_dev *pdev, return 0; +err_remove_phy: + txgbe_remove_phy(txgbe); err_release_hw: wx_clear_interrupt_scheme(wx); wx_control_hw(wx, false); @@ -719,11 +734,14 @@ err_pci_disable_dev: static void txgbe_remove(struct pci_dev *pdev) { struct wx *wx = pci_get_drvdata(pdev); + struct txgbe *txgbe = wx->priv; struct net_device *netdev; netdev = wx->netdev; unregister_netdev(netdev); + txgbe_remove_phy(txgbe); + pci_release_selected_regions(pdev, pci_select_bars(pdev, IORESOURCE_MEM)); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c new file mode 100644 index 000000000000..8779645a54be --- /dev/null +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -0,0 +1,673 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2015 - 2023 Beijing WangXun Technology Co., Ltd. */ + +#include <linux/gpio/machine.h> +#include <linux/gpio/driver.h> +#include <linux/gpio/property.h> +#include <linux/clk-provider.h> +#include <linux/clkdev.h> +#include <linux/i2c.h> +#include <linux/pci.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/pcs/pcs-xpcs.h> +#include <linux/phylink.h> + +#include "../libwx/wx_type.h" +#include "../libwx/wx_lib.h" +#include "../libwx/wx_hw.h" +#include "txgbe_type.h" +#include "txgbe_phy.h" + +static int txgbe_swnodes_register(struct txgbe *txgbe) +{ + struct txgbe_nodes *nodes = &txgbe->nodes; + struct pci_dev *pdev = txgbe->wx->pdev; + struct software_node *swnodes; + u32 id; + + id = (pdev->bus->number << 8) | pdev->devfn; + + snprintf(nodes->gpio_name, sizeof(nodes->gpio_name), "txgbe_gpio-%x", id); + snprintf(nodes->i2c_name, sizeof(nodes->i2c_name), "txgbe_i2c-%x", id); + snprintf(nodes->sfp_name, sizeof(nodes->sfp_name), "txgbe_sfp-%x", id); + snprintf(nodes->phylink_name, sizeof(nodes->phylink_name), "txgbe_phylink-%x", id); + + swnodes = nodes->swnodes; + + /* GPIO 0: tx fault + * GPIO 1: tx disable + * GPIO 2: sfp module absent + * GPIO 3: rx signal lost + * GPIO 4: rate select, 1G(0) 10G(1) + * GPIO 5: rate select, 1G(0) 10G(1) + */ + nodes->gpio_props[0] = PROPERTY_ENTRY_STRING("pinctrl-names", "default"); + swnodes[SWNODE_GPIO] = NODE_PROP(nodes->gpio_name, nodes->gpio_props); + nodes->gpio0_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_GPIO], 0, GPIO_ACTIVE_HIGH); + nodes->gpio1_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_GPIO], 1, GPIO_ACTIVE_HIGH); + nodes->gpio2_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_GPIO], 2, GPIO_ACTIVE_LOW); + nodes->gpio3_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_GPIO], 3, GPIO_ACTIVE_HIGH); + nodes->gpio4_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_GPIO], 4, GPIO_ACTIVE_HIGH); + nodes->gpio5_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_GPIO], 5, GPIO_ACTIVE_HIGH); + + nodes->i2c_props[0] = PROPERTY_ENTRY_STRING("compatible", "snps,designware-i2c"); + nodes->i2c_props[1] = PROPERTY_ENTRY_BOOL("wx,i2c-snps-model"); + nodes->i2c_props[2] = PROPERTY_ENTRY_U32("clock-frequency", I2C_MAX_STANDARD_MODE_FREQ); + swnodes[SWNODE_I2C] = NODE_PROP(nodes->i2c_name, nodes->i2c_props); + nodes->i2c_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_I2C]); + + nodes->sfp_props[0] = PROPERTY_ENTRY_STRING("compatible", "sff,sfp"); + nodes->sfp_props[1] = PROPERTY_ENTRY_REF_ARRAY("i2c-bus", nodes->i2c_ref); + nodes->sfp_props[2] = PROPERTY_ENTRY_REF_ARRAY("tx-fault-gpios", nodes->gpio0_ref); + nodes->sfp_props[3] = PROPERTY_ENTRY_REF_ARRAY("tx-disable-gpios", nodes->gpio1_ref); + nodes->sfp_props[4] = PROPERTY_ENTRY_REF_ARRAY("mod-def0-gpios", nodes->gpio2_ref); + nodes->sfp_props[5] = PROPERTY_ENTRY_REF_ARRAY("los-gpios", nodes->gpio3_ref); + nodes->sfp_props[6] = PROPERTY_ENTRY_REF_ARRAY("rate-select1-gpios", nodes->gpio4_ref); + nodes->sfp_props[7] = PROPERTY_ENTRY_REF_ARRAY("rate-select0-gpios", nodes->gpio5_ref); + swnodes[SWNODE_SFP] = NODE_PROP(nodes->sfp_name, nodes->sfp_props); + nodes->sfp_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_SFP]); + + nodes->phylink_props[0] = PROPERTY_ENTRY_STRING("managed", "in-band-status"); + nodes->phylink_props[1] = PROPERTY_ENTRY_REF_ARRAY("sfp", nodes->sfp_ref); + swnodes[SWNODE_PHYLINK] = NODE_PROP(nodes->phylink_name, nodes->phylink_props); + + nodes->group[SWNODE_GPIO] = &swnodes[SWNODE_GPIO]; + nodes->group[SWNODE_I2C] = &swnodes[SWNODE_I2C]; + nodes->group[SWNODE_SFP] = &swnodes[SWNODE_SFP]; + nodes->group[SWNODE_PHYLINK] = &swnodes[SWNODE_PHYLINK]; + + return software_node_register_node_group(nodes->group); +} + +static int txgbe_pcs_read(struct mii_bus *bus, int addr, int devnum, int regnum) +{ + struct wx *wx = bus->priv; + u32 offset, val; + + if (addr) + return -EOPNOTSUPP; + + offset = devnum << 16 | regnum; + + /* Set the LAN port indicator to IDA_ADDR */ + wr32(wx, TXGBE_XPCS_IDA_ADDR, offset); + + /* Read the data from IDA_DATA register */ + val = rd32(wx, TXGBE_XPCS_IDA_DATA); + + return (u16)val; +} + +static int txgbe_pcs_write(struct mii_bus *bus, int addr, int devnum, int regnum, u16 val) +{ + struct wx *wx = bus->priv; + u32 offset; + + if (addr) + return -EOPNOTSUPP; + + offset = devnum << 16 | regnum; + + /* Set the LAN port indicator to IDA_ADDR */ + wr32(wx, TXGBE_XPCS_IDA_ADDR, offset); + + /* Write the data to IDA_DATA register */ + wr32(wx, TXGBE_XPCS_IDA_DATA, val); + + return 0; +} + +static int txgbe_mdio_pcs_init(struct txgbe *txgbe) +{ + struct mii_bus *mii_bus; + struct dw_xpcs *xpcs; + struct pci_dev *pdev; + struct wx *wx; + int ret = 0; + + wx = txgbe->wx; + pdev = wx->pdev; + + mii_bus = devm_mdiobus_alloc(&pdev->dev); + if (!mii_bus) + return -ENOMEM; + + mii_bus->name = "txgbe_pcs_mdio_bus"; + mii_bus->read_c45 = &txgbe_pcs_read; + mii_bus->write_c45 = &txgbe_pcs_write; + mii_bus->parent = &pdev->dev; + mii_bus->phy_mask = ~0; + mii_bus->priv = wx; + snprintf(mii_bus->id, MII_BUS_ID_SIZE, "txgbe_pcs-%x", + (pdev->bus->number << 8) | pdev->devfn); + + ret = devm_mdiobus_register(&pdev->dev, mii_bus); + if (ret) + return ret; + + xpcs = xpcs_create_mdiodev(mii_bus, 0, PHY_INTERFACE_MODE_10GBASER); + if (IS_ERR(xpcs)) + return PTR_ERR(xpcs); + + txgbe->xpcs = xpcs; + + return 0; +} + +static struct phylink_pcs *txgbe_phylink_mac_select(struct phylink_config *config, + phy_interface_t interface) +{ + struct txgbe *txgbe = netdev_to_txgbe(to_net_dev(config->dev)); + + return &txgbe->xpcs->pcs; +} + +static void txgbe_mac_config(struct phylink_config *config, unsigned int mode, + const struct phylink_link_state *state) +{ +} + +static void txgbe_mac_link_down(struct phylink_config *config, + unsigned int mode, phy_interface_t interface) +{ + struct wx *wx = netdev_priv(to_net_dev(config->dev)); + + wr32m(wx, WX_MAC_TX_CFG, WX_MAC_TX_CFG_TE, 0); +} + +static void txgbe_mac_link_up(struct phylink_config *config, + struct phy_device *phy, + unsigned int mode, phy_interface_t interface, + int speed, int duplex, + bool tx_pause, bool rx_pause) +{ + struct wx *wx = netdev_priv(to_net_dev(config->dev)); + u32 txcfg, wdg; + + txcfg = rd32(wx, WX_MAC_TX_CFG); + txcfg &= ~WX_MAC_TX_CFG_SPEED_MASK; + + switch (speed) { + case SPEED_10000: + txcfg |= WX_MAC_TX_CFG_SPEED_10G; + break; + case SPEED_1000: + case SPEED_100: + case SPEED_10: + txcfg |= WX_MAC_TX_CFG_SPEED_1G; + break; + default: + break; + } + + wr32(wx, WX_MAC_TX_CFG, txcfg | WX_MAC_TX_CFG_TE); + + /* Re configure MAC Rx */ + wr32m(wx, WX_MAC_RX_CFG, WX_MAC_RX_CFG_RE, WX_MAC_RX_CFG_RE); + wr32(wx, WX_MAC_PKT_FLT, WX_MAC_PKT_FLT_PR); + wdg = rd32(wx, WX_MAC_WDG_TIMEOUT); + wr32(wx, WX_MAC_WDG_TIMEOUT, wdg); +} + +static const struct phylink_mac_ops txgbe_mac_ops = { + .mac_select_pcs = txgbe_phylink_mac_select, + .mac_config = txgbe_mac_config, + .mac_link_down = txgbe_mac_link_down, + .mac_link_up = txgbe_mac_link_up, +}; + +static int txgbe_phylink_init(struct txgbe *txgbe) +{ + struct phylink_config *config; + struct fwnode_handle *fwnode; + struct wx *wx = txgbe->wx; + phy_interface_t phy_mode; + struct phylink *phylink; + + config = devm_kzalloc(&wx->pdev->dev, sizeof(*config), GFP_KERNEL); + if (!config) + return -ENOMEM; + + config->dev = &wx->netdev->dev; + config->type = PHYLINK_NETDEV; + config->mac_capabilities = MAC_10000FD | MAC_1000FD | MAC_SYM_PAUSE | MAC_ASYM_PAUSE; + phy_mode = PHY_INTERFACE_MODE_10GBASER; + __set_bit(PHY_INTERFACE_MODE_10GBASER, config->supported_interfaces); + fwnode = software_node_fwnode(txgbe->nodes.group[SWNODE_PHYLINK]); + phylink = phylink_create(config, fwnode, phy_mode, &txgbe_mac_ops); + if (IS_ERR(phylink)) + return PTR_ERR(phylink); + + txgbe->phylink = phylink; + + return 0; +} + +static int txgbe_gpio_get(struct gpio_chip *chip, unsigned int offset) +{ + struct wx *wx = gpiochip_get_data(chip); + int val; + + val = rd32m(wx, WX_GPIO_EXT, BIT(offset)); + + return !!(val & BIT(offset)); +} + +static int txgbe_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) +{ + struct wx *wx = gpiochip_get_data(chip); + u32 val; + + val = rd32(wx, WX_GPIO_DDR); + if (BIT(offset) & val) + return GPIO_LINE_DIRECTION_OUT; + + return GPIO_LINE_DIRECTION_IN; +} + +static int txgbe_gpio_direction_in(struct gpio_chip *chip, unsigned int offset) +{ + struct wx *wx = gpiochip_get_data(chip); + unsigned long flags; + + raw_spin_lock_irqsave(&wx->gpio_lock, flags); + wr32m(wx, WX_GPIO_DDR, BIT(offset), 0); + raw_spin_unlock_irqrestore(&wx->gpio_lock, flags); + + return 0; +} + +static int txgbe_gpio_direction_out(struct gpio_chip *chip, unsigned int offset, + int val) +{ + struct wx *wx = gpiochip_get_data(chip); + unsigned long flags; + u32 set; + + set = val ? BIT(offset) : 0; + + raw_spin_lock_irqsave(&wx->gpio_lock, flags); + wr32m(wx, WX_GPIO_DR, BIT(offset), set); + wr32m(wx, WX_GPIO_DDR, BIT(offset), BIT(offset)); + raw_spin_unlock_irqrestore(&wx->gpio_lock, flags); + + return 0; +} + +static void txgbe_gpio_irq_ack(struct irq_data *d) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + struct wx *wx = gpiochip_get_data(gc); + unsigned long flags; + + raw_spin_lock_irqsave(&wx->gpio_lock, flags); + wr32(wx, WX_GPIO_EOI, BIT(hwirq)); + raw_spin_unlock_irqrestore(&wx->gpio_lock, flags); +} + +static void txgbe_gpio_irq_mask(struct irq_data *d) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + struct wx *wx = gpiochip_get_data(gc); + unsigned long flags; + + gpiochip_disable_irq(gc, hwirq); + + raw_spin_lock_irqsave(&wx->gpio_lock, flags); + wr32m(wx, WX_GPIO_INTMASK, BIT(hwirq), BIT(hwirq)); + raw_spin_unlock_irqrestore(&wx->gpio_lock, flags); +} + +static void txgbe_gpio_irq_unmask(struct irq_data *d) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + struct wx *wx = gpiochip_get_data(gc); + unsigned long flags; + + gpiochip_enable_irq(gc, hwirq); + + raw_spin_lock_irqsave(&wx->gpio_lock, flags); + wr32m(wx, WX_GPIO_INTMASK, BIT(hwirq), 0); + raw_spin_unlock_irqrestore(&wx->gpio_lock, flags); +} + +static void txgbe_toggle_trigger(struct gpio_chip *gc, unsigned int offset) +{ + struct wx *wx = gpiochip_get_data(gc); + u32 pol, val; + + pol = rd32(wx, WX_GPIO_POLARITY); + val = rd32(wx, WX_GPIO_EXT); + + if (val & BIT(offset)) + pol &= ~BIT(offset); + else + pol |= BIT(offset); + + wr32(wx, WX_GPIO_POLARITY, pol); +} + +static int txgbe_gpio_set_type(struct irq_data *d, unsigned int type) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + struct wx *wx = gpiochip_get_data(gc); + u32 level, polarity, mask; + unsigned long flags; + + mask = BIT(hwirq); + + if (type & IRQ_TYPE_LEVEL_MASK) { + level = 0; + irq_set_handler_locked(d, handle_level_irq); + } else { + level = mask; + irq_set_handler_locked(d, handle_edge_irq); + } + + if (type == IRQ_TYPE_EDGE_RISING || type == IRQ_TYPE_LEVEL_HIGH) + polarity = mask; + else + polarity = 0; + + raw_spin_lock_irqsave(&wx->gpio_lock, flags); + + wr32m(wx, WX_GPIO_INTEN, mask, mask); + wr32m(wx, WX_GPIO_INTTYPE_LEVEL, mask, level); + if (type == IRQ_TYPE_EDGE_BOTH) + txgbe_toggle_trigger(gc, hwirq); + else + wr32m(wx, WX_GPIO_POLARITY, mask, polarity); + + raw_spin_unlock_irqrestore(&wx->gpio_lock, flags); + + return 0; +} + +static const struct irq_chip txgbe_gpio_irq_chip = { + .name = "txgbe_gpio_irq", + .irq_ack = txgbe_gpio_irq_ack, + .irq_mask = txgbe_gpio_irq_mask, + .irq_unmask = txgbe_gpio_irq_unmask, + .irq_set_type = txgbe_gpio_set_type, + .flags = IRQCHIP_IMMUTABLE, + GPIOCHIP_IRQ_RESOURCE_HELPERS, +}; + +static void txgbe_irq_handler(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct wx *wx = irq_desc_get_handler_data(desc); + struct txgbe *txgbe = wx->priv; + irq_hw_number_t hwirq; + unsigned long gpioirq; + struct gpio_chip *gc; + unsigned long flags; + u32 eicr; + + eicr = wx_misc_isb(wx, WX_ISB_MISC); + + chained_irq_enter(chip, desc); + + gpioirq = rd32(wx, WX_GPIO_INTSTATUS); + + gc = txgbe->gpio; + for_each_set_bit(hwirq, &gpioirq, gc->ngpio) { + int gpio = irq_find_mapping(gc->irq.domain, hwirq); + u32 irq_type = irq_get_trigger_type(gpio); + + generic_handle_domain_irq(gc->irq.domain, hwirq); + + if ((irq_type & IRQ_TYPE_SENSE_MASK) == IRQ_TYPE_EDGE_BOTH) { + raw_spin_lock_irqsave(&wx->gpio_lock, flags); + txgbe_toggle_trigger(gc, hwirq); + raw_spin_unlock_irqrestore(&wx->gpio_lock, flags); + } + } + + chained_irq_exit(chip, desc); + + if (eicr & (TXGBE_PX_MISC_ETH_LK | TXGBE_PX_MISC_ETH_LKDN)) { + u32 reg = rd32(wx, TXGBE_CFG_PORT_ST); + + phylink_mac_change(txgbe->phylink, !!(reg & TXGBE_CFG_PORT_ST_LINK_UP)); + } + + /* unmask interrupt */ + wx_intr_enable(wx, TXGBE_INTR_MISC(wx)); +} + +static int txgbe_gpio_init(struct txgbe *txgbe) +{ + struct gpio_irq_chip *girq; + struct gpio_chip *gc; + struct device *dev; + struct wx *wx; + int ret; + + wx = txgbe->wx; + dev = &wx->pdev->dev; + + raw_spin_lock_init(&wx->gpio_lock); + + gc = devm_kzalloc(dev, sizeof(*gc), GFP_KERNEL); + if (!gc) + return -ENOMEM; + + gc->label = devm_kasprintf(dev, GFP_KERNEL, "txgbe_gpio-%x", + (wx->pdev->bus->number << 8) | wx->pdev->devfn); + if (!gc->label) + return -ENOMEM; + + gc->base = -1; + gc->ngpio = 6; + gc->owner = THIS_MODULE; + gc->parent = dev; + gc->fwnode = software_node_fwnode(txgbe->nodes.group[SWNODE_GPIO]); + gc->get = txgbe_gpio_get; + gc->get_direction = txgbe_gpio_get_direction; + gc->direction_input = txgbe_gpio_direction_in; + gc->direction_output = txgbe_gpio_direction_out; + + girq = &gc->irq; + gpio_irq_chip_set_chip(girq, &txgbe_gpio_irq_chip); + girq->parent_handler = txgbe_irq_handler; + girq->parent_handler_data = wx; + girq->num_parents = 1; + girq->parents = devm_kcalloc(dev, girq->num_parents, + sizeof(*girq->parents), GFP_KERNEL); + if (!girq->parents) + return -ENOMEM; + girq->parents[0] = wx->msix_entries[wx->num_q_vectors].vector; + girq->default_type = IRQ_TYPE_NONE; + girq->handler = handle_bad_irq; + + ret = devm_gpiochip_add_data(dev, gc, wx); + if (ret) + return ret; + + txgbe->gpio = gc; + + return 0; +} + +static int txgbe_clock_register(struct txgbe *txgbe) +{ + struct pci_dev *pdev = txgbe->wx->pdev; + struct clk_lookup *clock; + char clk_name[32]; + struct clk *clk; + + snprintf(clk_name, sizeof(clk_name), "i2c_designware.%d", + (pdev->bus->number << 8) | pdev->devfn); + + clk = clk_register_fixed_rate(NULL, clk_name, NULL, 0, 156250000); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + clock = clkdev_create(clk, NULL, clk_name); + if (!clock) { + clk_unregister(clk); + return -ENOMEM; + } + + txgbe->clk = clk; + txgbe->clock = clock; + + return 0; +} + +static int txgbe_i2c_read(void *context, unsigned int reg, unsigned int *val) +{ + struct wx *wx = context; + + *val = rd32(wx, reg + TXGBE_I2C_BASE); + + return 0; +} + +static int txgbe_i2c_write(void *context, unsigned int reg, unsigned int val) +{ + struct wx *wx = context; + + wr32(wx, reg + TXGBE_I2C_BASE, val); + + return 0; +} + +static const struct regmap_config i2c_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_read = txgbe_i2c_read, + .reg_write = txgbe_i2c_write, + .fast_io = true, +}; + +static int txgbe_i2c_register(struct txgbe *txgbe) +{ + struct platform_device_info info = {}; + struct platform_device *i2c_dev; + struct regmap *i2c_regmap; + struct pci_dev *pdev; + struct wx *wx; + + wx = txgbe->wx; + pdev = wx->pdev; + i2c_regmap = devm_regmap_init(&pdev->dev, NULL, wx, &i2c_regmap_config); + if (IS_ERR(i2c_regmap)) { + wx_err(wx, "failed to init I2C regmap\n"); + return PTR_ERR(i2c_regmap); + } + + info.parent = &pdev->dev; + info.fwnode = software_node_fwnode(txgbe->nodes.group[SWNODE_I2C]); + info.name = "i2c_designware"; + info.id = (pdev->bus->number << 8) | pdev->devfn; + + info.res = &DEFINE_RES_IRQ(pdev->irq); + info.num_res = 1; + i2c_dev = platform_device_register_full(&info); + if (IS_ERR(i2c_dev)) + return PTR_ERR(i2c_dev); + + txgbe->i2c_dev = i2c_dev; + + return 0; +} + +static int txgbe_sfp_register(struct txgbe *txgbe) +{ + struct pci_dev *pdev = txgbe->wx->pdev; + struct platform_device_info info = {}; + struct platform_device *sfp_dev; + + info.parent = &pdev->dev; + info.fwnode = software_node_fwnode(txgbe->nodes.group[SWNODE_SFP]); + info.name = "sfp"; + info.id = (pdev->bus->number << 8) | pdev->devfn; + sfp_dev = platform_device_register_full(&info); + if (IS_ERR(sfp_dev)) + return PTR_ERR(sfp_dev); + + txgbe->sfp_dev = sfp_dev; + + return 0; +} + +int txgbe_init_phy(struct txgbe *txgbe) +{ + int ret; + + ret = txgbe_swnodes_register(txgbe); + if (ret) { + wx_err(txgbe->wx, "failed to register software nodes\n"); + return ret; + } + + ret = txgbe_mdio_pcs_init(txgbe); + if (ret) { + wx_err(txgbe->wx, "failed to init mdio pcs: %d\n", ret); + goto err_unregister_swnode; + } + + ret = txgbe_phylink_init(txgbe); + if (ret) { + wx_err(txgbe->wx, "failed to init phylink\n"); + goto err_destroy_xpcs; + } + + ret = txgbe_gpio_init(txgbe); + if (ret) { + wx_err(txgbe->wx, "failed to init gpio\n"); + goto err_destroy_phylink; + } + + ret = txgbe_clock_register(txgbe); + if (ret) { + wx_err(txgbe->wx, "failed to register clock: %d\n", ret); + goto err_destroy_phylink; + } + + ret = txgbe_i2c_register(txgbe); + if (ret) { + wx_err(txgbe->wx, "failed to init i2c interface: %d\n", ret); + goto err_unregister_clk; + } + + ret = txgbe_sfp_register(txgbe); + if (ret) { + wx_err(txgbe->wx, "failed to register sfp\n"); + goto err_unregister_i2c; + } + + return 0; + +err_unregister_i2c: + platform_device_unregister(txgbe->i2c_dev); +err_unregister_clk: + clkdev_drop(txgbe->clock); + clk_unregister(txgbe->clk); +err_destroy_phylink: + phylink_destroy(txgbe->phylink); +err_destroy_xpcs: + xpcs_destroy(txgbe->xpcs); +err_unregister_swnode: + software_node_unregister_node_group(txgbe->nodes.group); + + return ret; +} + +void txgbe_remove_phy(struct txgbe *txgbe) +{ + platform_device_unregister(txgbe->sfp_dev); + platform_device_unregister(txgbe->i2c_dev); + clkdev_drop(txgbe->clock); + clk_unregister(txgbe->clk); + phylink_destroy(txgbe->phylink); + xpcs_destroy(txgbe->xpcs); + software_node_unregister_node_group(txgbe->nodes.group); +} diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h new file mode 100644 index 000000000000..1ab592124986 --- /dev/null +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2015 - 2023 Beijing WangXun Technology Co., Ltd. */ + +#ifndef _TXGBE_PHY_H_ +#define _TXGBE_PHY_H_ + +int txgbe_init_phy(struct txgbe *txgbe); +void txgbe_remove_phy(struct txgbe *txgbe); + +#endif /* _TXGBE_NODE_H_ */ diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 63a1c733718d..51199c355f95 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -4,6 +4,8 @@ #ifndef _TXGBE_TYPE_H_ #define _TXGBE_TYPE_H_ +#include <linux/property.h> + /* Device IDs */ #define TXGBE_DEV_ID_SP1000 0x1001 #define TXGBE_DEV_ID_WX1820 0x2001 @@ -53,6 +55,39 @@ #define TXGBE_TS_CTL 0x10300 #define TXGBE_TS_CTL_EVAL_MD BIT(31) +/* GPIO register bit */ +#define TXGBE_GPIOBIT_0 BIT(0) /* I:tx fault */ +#define TXGBE_GPIOBIT_1 BIT(1) /* O:tx disabled */ +#define TXGBE_GPIOBIT_2 BIT(2) /* I:sfp module absent */ +#define TXGBE_GPIOBIT_3 BIT(3) /* I:rx signal lost */ +#define TXGBE_GPIOBIT_4 BIT(4) /* O:rate select, 1G(0) 10G(1) */ +#define TXGBE_GPIOBIT_5 BIT(5) /* O:rate select, 1G(0) 10G(1) */ + +/* Extended Interrupt Enable Set */ +#define TXGBE_PX_MISC_ETH_LKDN BIT(8) +#define TXGBE_PX_MISC_DEV_RST BIT(10) +#define TXGBE_PX_MISC_ETH_EVENT BIT(17) +#define TXGBE_PX_MISC_ETH_LK BIT(18) +#define TXGBE_PX_MISC_ETH_AN BIT(19) +#define TXGBE_PX_MISC_INT_ERR BIT(20) +#define TXGBE_PX_MISC_GPIO BIT(26) +#define TXGBE_PX_MISC_IEN_MASK \ + (TXGBE_PX_MISC_ETH_LKDN | TXGBE_PX_MISC_DEV_RST | \ + TXGBE_PX_MISC_ETH_EVENT | TXGBE_PX_MISC_ETH_LK | \ + TXGBE_PX_MISC_ETH_AN | TXGBE_PX_MISC_INT_ERR | \ + TXGBE_PX_MISC_GPIO) + +/* Port cfg registers */ +#define TXGBE_CFG_PORT_ST 0x14404 +#define TXGBE_CFG_PORT_ST_LINK_UP BIT(0) + +/* I2C registers */ +#define TXGBE_I2C_BASE 0x14900 + +/************************************** ETH PHY ******************************/ +#define TXGBE_XPCS_IDA_ADDR 0x13000 +#define TXGBE_XPCS_IDA_DATA 0x13004 + /* Part Number String Length */ #define TXGBE_PBANUM_LENGTH 32 @@ -77,6 +112,7 @@ #define TXGBE_SP_MAX_RX_QUEUES 128 #define TXGBE_SP_RAR_ENTRIES 128 #define TXGBE_SP_MC_TBL_SIZE 128 +#define TXGBE_SP_VFT_TBL_SIZE 128 #define TXGBE_SP_RX_PB_SIZE 512 #define TXGBE_SP_TDB_PB_SZ (160 * 1024) /* 160KB Packet Buffer */ @@ -99,4 +135,58 @@ extern char txgbe_driver_name[]; +static inline struct txgbe *netdev_to_txgbe(struct net_device *netdev) +{ + struct wx *wx = netdev_priv(netdev); + + return wx->priv; +} + +#define NODE_PROP(_NAME, _PROP) \ + (const struct software_node) { \ + .name = _NAME, \ + .properties = _PROP, \ + } + +enum txgbe_swnodes { + SWNODE_GPIO = 0, + SWNODE_I2C, + SWNODE_SFP, + SWNODE_PHYLINK, + SWNODE_MAX +}; + +struct txgbe_nodes { + char gpio_name[32]; + char i2c_name[32]; + char sfp_name[32]; + char phylink_name[32]; + struct property_entry gpio_props[1]; + struct property_entry i2c_props[3]; + struct property_entry sfp_props[8]; + struct property_entry phylink_props[2]; + struct software_node_ref_args i2c_ref[1]; + struct software_node_ref_args gpio0_ref[1]; + struct software_node_ref_args gpio1_ref[1]; + struct software_node_ref_args gpio2_ref[1]; + struct software_node_ref_args gpio3_ref[1]; + struct software_node_ref_args gpio4_ref[1]; + struct software_node_ref_args gpio5_ref[1]; + struct software_node_ref_args sfp_ref[1]; + struct software_node swnodes[SWNODE_MAX]; + const struct software_node *group[SWNODE_MAX + 1]; +}; + +struct txgbe { + struct wx *wx; + struct txgbe_nodes nodes; + struct dw_xpcs *xpcs; + struct phylink *phylink; + struct platform_device *sfp_dev; + struct platform_device *i2c_dev; + struct clk_lookup *clock; + struct clk *clk; + struct gpio_chip *gpio; +}; + #endif /* _TXGBE_TYPE_H_ */ diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index e0ac1bcd9925..49f303353ecb 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1567,12 +1567,16 @@ static int temac_probe(struct platform_device *pdev) } /* Error handle returned DMA RX and TX interrupts */ - if (lp->rx_irq < 0) - return dev_err_probe(&pdev->dev, lp->rx_irq, + if (lp->rx_irq <= 0) { + rc = lp->rx_irq ?: -EINVAL; + return dev_err_probe(&pdev->dev, rc, "could not get DMA RX irq\n"); - if (lp->tx_irq < 0) - return dev_err_probe(&pdev->dev, lp->tx_irq, + } + if (lp->tx_irq <= 0) { + rc = lp->tx_irq ?: -EINVAL; + return dev_err_probe(&pdev->dev, rc, "could not get DMA TX irq\n"); + } if (temac_np) { /* Retrieve the MAC address */ diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 3e310b55bce2..8e32dc50a408 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1631,7 +1631,7 @@ static void axienet_pcs_an_restart(struct phylink_pcs *pcs) phylink_mii_c22_pcs_an_restart(pcs_phy); } -static int axienet_pcs_config(struct phylink_pcs *pcs, unsigned int mode, +static int axienet_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, bool permit_pause_to_mac) @@ -1653,7 +1653,8 @@ static int axienet_pcs_config(struct phylink_pcs *pcs, unsigned int mode, } } - ret = phylink_mii_c22_pcs_config(pcs_phy, mode, interface, advertising); + ret = phylink_mii_c22_pcs_config(pcs_phy, interface, advertising, + neg_mode); if (ret < 0) netdev_warn(ndev, "Failed to configure PCS: %d\n", ret); @@ -2042,6 +2043,11 @@ static int axienet_probe(struct platform_device *pdev) goto cleanup_clk; } + /* Reset core now that clocks are enabled, prior to accessing MDIO */ + ret = __axienet_device_reset(lp); + if (ret) + goto cleanup_clk; + /* Autodetect the need for 64-bit DMA pointers. * When the IP is configured for a bus width bigger than 32 bits, * writing the MSB registers is mandatory, even if they are all 0. @@ -2096,11 +2102,6 @@ static int axienet_probe(struct platform_device *pdev) lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD; lp->coalesce_usec_tx = XAXIDMA_DFT_TX_USEC; - /* Reset core now that clocks are enabled, prior to accessing MDIO */ - ret = __axienet_device_reset(lp); - if (ret) - goto cleanup_clk; - ret = axienet_mdio_setup(lp); if (ret) dev_warn(&pdev->dev, @@ -2129,6 +2130,7 @@ static int axienet_probe(struct platform_device *pdev) } of_node_put(np); lp->pcs.ops = &axienet_pcs_ops; + lp->pcs.neg_mode = true; lp->pcs.poll = true; } |