diff options
49 files changed, 2451 insertions, 356 deletions
diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index fda00aac0d72..b038410eccb6 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -439,12 +439,12 @@ The possible values in this file are: - System is protected by retpoline * - BHI: BHI_DIS_S - System is protected by BHI_DIS_S - * - BHI: SW loop; KVM SW loop + * - BHI: SW loop, KVM SW loop - System is protected by software clearing sequence - * - BHI: Syscall hardening - - Syscalls are hardened against BHI - * - BHI: Syscall hardening; KVM: SW loop - - System is protected from userspace attacks by syscall hardening; KVM is protected by software clearing sequence + * - BHI: Vulnerable + - System is vulnerable to BHI + * - BHI: Vulnerable, KVM: SW loop + - System is vulnerable; KVM is protected by software clearing sequence Full mitigation might require a microcode update from the CPU vendor. When the necessary microcode is not available, the kernel will @@ -711,18 +711,14 @@ For user space mitigation: spectre_bhi= [X86] Control mitigation of Branch History Injection - (BHI) vulnerability. Syscalls are hardened against BHI - regardless of this setting. This setting affects the deployment + (BHI) vulnerability. This setting affects the deployment of the HW BHI control and the SW BHB clearing sequence. on - unconditionally enable. + (default) Enable the HW or SW mitigation as + needed. off - unconditionally disable. - auto - enable if hardware mitigation - control(BHI_DIS_S) is available, otherwise - enable alternate mitigation in KVM. + Disable the mitigation. For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 25ec9883c113..9fb22e34d09a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3093,6 +3093,7 @@ reg_file_data_sampling=off [X86] retbleed=off [X86] spec_store_bypass_disable=off [X86,PPC] + spectre_bhi=off [X86] spectre_v2_user=off [X86] ssbd=force-off [ARM64] tsx_async_abort=off [X86] @@ -5405,16 +5406,13 @@ See Documentation/admin-guide/laptops/sonypi.rst spectre_bhi= [X86] Control mitigation of Branch History Injection - (BHI) vulnerability. Syscalls are hardened against BHI - reglardless of this setting. This setting affects the + (BHI) vulnerability. This setting affects the deployment of the HW BHI control and the SW BHB clearing sequence. - on - unconditionally enable. - off - unconditionally disable. - auto - (default) enable hardware mitigation - (BHI_DIS_S) if available, otherwise enable - alternate mitigation in KVM. + on - (default) Enable the HW or SW mitigation + as needed. + off - Disable the mitigation. spectre_v2= [X86] Control mitigation of Spectre variant 2 (indirect branch speculation) vulnerability. @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 15 -SUBLEVEL = 155 +SUBLEVEL = 156 EXTRAVERSION = NAME = Trick or Treat diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1f2e53b8650b..6e83acf4be42 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2511,31 +2511,16 @@ config MITIGATION_RFDS stored in floating point, vector and integer registers. See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst> -choice - prompt "Clear branch history" +config MITIGATION_SPECTRE_BHI + bool "Mitigate Spectre-BHB (Branch History Injection)" depends on CPU_SUP_INTEL - default SPECTRE_BHI_ON + default y help Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks where the branch history buffer is poisoned to speculatively steer indirect branches. See <file:Documentation/admin-guide/hw-vuln/spectre.rst> -config SPECTRE_BHI_ON - bool "on" - help - Equivalent to setting spectre_bhi=on command line parameter. -config SPECTRE_BHI_OFF - bool "off" - help - Equivalent to setting spectre_bhi=off command line parameter. -config SPECTRE_BHI_AUTO - bool "auto" - help - Equivalent to setting spectre_bhi=auto command line parameter. - -endchoice - endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 81d5e0a1f48c..e55fc25da2da 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1649,6 +1649,7 @@ static void x86_pmu_del(struct perf_event *event, int flags) while (++i < cpuc->n_events) { cpuc->event_list[i-1] = cpuc->event_list[i]; cpuc->event_constraint[i-1] = cpuc->event_constraint[i]; + cpuc->assign[i-1] = cpuc->assign[i]; } cpuc->event_constraint[i-1] = NULL; --cpuc->n_events; diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 48067af94678..d2db8f4fa179 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -12,6 +12,7 @@ #include <asm/mpspec.h> #include <asm/msr.h> #include <asm/hardirq.h> +#include <asm/io.h> #define ARCH_APICTIMER_STOPS_ON_C3 1 @@ -111,7 +112,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v) static inline u32 native_apic_mem_read(u32 reg) { - return *((volatile u32 *)(APIC_BASE + reg)); + return readl((void __iomem *)(APIC_BASE + reg)); } extern void native_apic_wait_icr_idle(void); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 50fdd6ca3b78..b30b32b288dd 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -60,6 +60,8 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB; EXPORT_SYMBOL_GPL(x86_pred_cmd); +static u64 __ro_after_init x86_arch_cap_msr; + static DEFINE_MUTEX(spec_ctrl_mutex); void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk; @@ -143,6 +145,8 @@ void __init cpu_select_mitigations(void) x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK; } + x86_arch_cap_msr = x86_read_arch_cap_msr(); + /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); spectre_v2_select_mitigation(); @@ -307,8 +311,6 @@ static const char * const taa_strings[] = { static void __init taa_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_TAA)) { taa_mitigation = TAA_MITIGATION_OFF; return; @@ -347,9 +349,8 @@ static void __init taa_select_mitigation(void) * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode * update is required. */ - ia32_cap = x86_read_arch_cap_msr(); - if ( (ia32_cap & ARCH_CAP_MDS_NO) && - !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)) + if ( (x86_arch_cap_msr & ARCH_CAP_MDS_NO) && + !(x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR)) taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; /* @@ -407,8 +408,6 @@ static const char * const mmio_strings[] = { static void __init mmio_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) || cpu_mitigations_off()) { @@ -419,8 +418,6 @@ static void __init mmio_select_mitigation(void) if (mmio_mitigation == MMIO_MITIGATION_OFF) return; - ia32_cap = x86_read_arch_cap_msr(); - /* * Enable CPU buffer clear mitigation for host and VMM, if also affected * by MDS or TAA. Otherwise, enable mitigation for VMM only. @@ -443,7 +440,7 @@ static void __init mmio_select_mitigation(void) * be propagated to uncore buffers, clearing the Fill buffers on idle * is required irrespective of SMT state. */ - if (!(ia32_cap & ARCH_CAP_FBSDP_NO)) + if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) static_branch_enable(&mds_idle_clear); /* @@ -453,10 +450,10 @@ static void __init mmio_select_mitigation(void) * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS * affected systems. */ - if ((ia32_cap & ARCH_CAP_FB_CLEAR) || + if ((x86_arch_cap_msr & ARCH_CAP_FB_CLEAR) || (boot_cpu_has(X86_FEATURE_MD_CLEAR) && boot_cpu_has(X86_FEATURE_FLUSH_L1D) && - !(ia32_cap & ARCH_CAP_MDS_NO))) + !(x86_arch_cap_msr & ARCH_CAP_MDS_NO))) mmio_mitigation = MMIO_MITIGATION_VERW; else mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED; @@ -514,7 +511,7 @@ static void __init rfds_select_mitigation(void) if (rfds_mitigation == RFDS_MITIGATION_OFF) return; - if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR) setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); else rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED; @@ -658,8 +655,6 @@ void update_srbds_msr(void) static void __init srbds_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_SRBDS)) return; @@ -668,8 +663,7 @@ static void __init srbds_select_mitigation(void) * are only exposed to SRBDS when TSX is enabled or when CPU is affected * by Processor MMIO Stale Data vulnerability. */ - ia32_cap = x86_read_arch_cap_msr(); - if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && + if ((x86_arch_cap_msr & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) srbds_mitigation = SRBDS_MITIGATION_TSX_OFF; else if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) @@ -812,7 +806,7 @@ static void __init gds_select_mitigation(void) /* Will verify below that mitigation _can_ be disabled */ /* No microcode */ - if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) { + if (!(x86_arch_cap_msr & ARCH_CAP_GDS_CTRL)) { if (gds_mitigation == GDS_MITIGATION_FORCE) { /* * This only needs to be done on the boot CPU so do it @@ -1521,20 +1515,25 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) return SPECTRE_V2_RETPOLINE; } +static bool __ro_after_init rrsba_disabled; + /* Disable in-kernel use of non-RSB RET predictors */ static void __init spec_ctrl_disable_kernel_rrsba(void) { - u64 ia32_cap; + if (rrsba_disabled) + return; - if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) + if (!(x86_arch_cap_msr & ARCH_CAP_RRSBA)) { + rrsba_disabled = true; return; + } - ia32_cap = x86_read_arch_cap_msr(); + if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) + return; - if (ia32_cap & ARCH_CAP_RRSBA) { - x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; - update_spec_ctrl(x86_spec_ctrl_base); - } + x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; + update_spec_ctrl(x86_spec_ctrl_base); + rrsba_disabled = true; } static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode) @@ -1603,13 +1602,10 @@ static bool __init spec_ctrl_bhi_dis(void) enum bhi_mitigations { BHI_MITIGATION_OFF, BHI_MITIGATION_ON, - BHI_MITIGATION_AUTO, }; static enum bhi_mitigations bhi_mitigation __ro_after_init = - IS_ENABLED(CONFIG_SPECTRE_BHI_ON) ? BHI_MITIGATION_ON : - IS_ENABLED(CONFIG_SPECTRE_BHI_OFF) ? BHI_MITIGATION_OFF : - BHI_MITIGATION_AUTO; + IS_ENABLED(CONFIG_MITIGATION_SPECTRE_BHI) ? BHI_MITIGATION_ON : BHI_MITIGATION_OFF; static int __init spectre_bhi_parse_cmdline(char *str) { @@ -1620,8 +1616,6 @@ static int __init spectre_bhi_parse_cmdline(char *str) bhi_mitigation = BHI_MITIGATION_OFF; else if (!strcmp(str, "on")) bhi_mitigation = BHI_MITIGATION_ON; - else if (!strcmp(str, "auto")) - bhi_mitigation = BHI_MITIGATION_AUTO; else pr_err("Ignoring unknown spectre_bhi option (%s)", str); @@ -1635,9 +1629,11 @@ static void __init bhi_select_mitigation(void) return; /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */ - if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) && - !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA)) - return; + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { + spec_ctrl_disable_kernel_rrsba(); + if (rrsba_disabled) + return; + } if (spec_ctrl_bhi_dis()) return; @@ -1649,9 +1645,6 @@ static void __init bhi_select_mitigation(void) setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT); pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n"); - if (bhi_mitigation == BHI_MITIGATION_AUTO) - return; - /* Mitigate syscalls when the mitigation is forced =on */ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP); pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n"); @@ -1884,8 +1877,6 @@ static void update_indir_branch_cond(void) /* Update the static key controlling the MDS CPU buffer clear in idle */ static void update_mds_branch_idle(void) { - u64 ia32_cap = x86_read_arch_cap_msr(); - /* * Enable the idle clearing if SMT is active on CPUs which are * affected only by MSBDS and not any other MDS variant. @@ -1900,7 +1891,7 @@ static void update_mds_branch_idle(void) if (sched_smt_active()) { static_branch_enable(&mds_idle_clear); } else if (mmio_mitigation == MMIO_MITIGATION_OFF || - (ia32_cap & ARCH_CAP_FBSDP_NO)) { + (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) { static_branch_disable(&mds_idle_clear); } } @@ -2788,7 +2779,7 @@ static char *pbrsb_eibrs_state(void) } } -static const char * const spectre_bhi_state(void) +static const char *spectre_bhi_state(void) { if (!boot_cpu_has_bug(X86_BUG_BHI)) return "; BHI: Not affected"; @@ -2796,13 +2787,12 @@ static const char * const spectre_bhi_state(void) return "; BHI: BHI_DIS_S"; else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) return "; BHI: SW loop, KVM: SW loop"; - else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && - !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA)) + else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && rrsba_disabled) return "; BHI: Retpoline"; - else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) - return "; BHI: Syscall hardening, KVM: SW loop"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) + return "; BHI: Vulnerable, KVM: SW loop"; - return "; BHI: Vulnerable (Syscall hardening enabled)"; + return "; BHI: Vulnerable"; } static ssize_t spectre_v2_show_state(char *buf) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 664562389665..809e12f130d8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1198,25 +1198,25 @@ static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long whi u64 x86_read_arch_cap_msr(void) { - u64 ia32_cap = 0; + u64 x86_arch_cap_msr = 0; if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr); - return ia32_cap; + return x86_arch_cap_msr; } -static bool arch_cap_mmio_immune(u64 ia32_cap) +static bool arch_cap_mmio_immune(u64 x86_arch_cap_msr) { - return (ia32_cap & ARCH_CAP_FBSDP_NO && - ia32_cap & ARCH_CAP_PSDP_NO && - ia32_cap & ARCH_CAP_SBDR_SSDP_NO); + return (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO && + x86_arch_cap_msr & ARCH_CAP_PSDP_NO && + x86_arch_cap_msr & ARCH_CAP_SBDR_SSDP_NO); } -static bool __init vulnerable_to_rfds(u64 ia32_cap) +static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr) { /* The "immunity" bit trumps everything else: */ - if (ia32_cap & ARCH_CAP_RFDS_NO) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_NO) return false; /* @@ -1224,7 +1224,7 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap) * indicate that mitigation is needed because guest is running on a * vulnerable hardware or may migrate to such hardware: */ - if (ia32_cap & ARCH_CAP_RFDS_CLEAR) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR) return true; /* Only consult the blacklist when there is no enumeration: */ @@ -1233,11 +1233,11 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap) static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { - u64 ia32_cap = x86_read_arch_cap_msr(); + u64 x86_arch_cap_msr = x86_read_arch_cap_msr(); /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) && - !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) + !(x86_arch_cap_msr & ARCH_CAP_PSCHANGE_MC_NO)) setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION)) @@ -1249,7 +1249,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V2); if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) && - !(ia32_cap & ARCH_CAP_SSB_NO) && + !(x86_arch_cap_msr & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); @@ -1257,15 +1257,15 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * AMD's AutoIBRS is equivalent to Intel's eIBRS - use the Intel feature * flag and protect from vendor-specific bugs via the whitelist. */ - if ((ia32_cap & ARCH_CAP_IBRS_ALL) || cpu_has(c, X86_FEATURE_AUTOIBRS)) { + if ((x86_arch_cap_msr & ARCH_CAP_IBRS_ALL) || cpu_has(c, X86_FEATURE_AUTOIBRS)) { setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && - !(ia32_cap & ARCH_CAP_PBRSB_NO)) + !(x86_arch_cap_msr & ARCH_CAP_PBRSB_NO)) setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); } if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) && - !(ia32_cap & ARCH_CAP_MDS_NO)) { + !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)) { setup_force_cpu_bug(X86_BUG_MDS); if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY)) setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); @@ -1284,9 +1284,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * TSX_CTRL check alone is not sufficient for cases when the microcode * update is not present or running as guest that don't get TSX_CTRL. */ - if (!(ia32_cap & ARCH_CAP_TAA_NO) && + if (!(x86_arch_cap_msr & ARCH_CAP_TAA_NO) && (cpu_has(c, X86_FEATURE_RTM) || - (ia32_cap & ARCH_CAP_TSX_CTRL_MSR))) + (x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR))) setup_force_cpu_bug(X86_BUG_TAA); /* @@ -1312,7 +1312,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist, * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits. */ - if (!arch_cap_mmio_immune(ia32_cap)) { + if (!arch_cap_mmio_immune(x86_arch_cap_msr)) { if (cpu_matches(cpu_vuln_blacklist, MMIO)) setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO)) @@ -1320,7 +1320,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) } if (!cpu_has(c, X86_FEATURE_BTC_NO)) { - if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) + if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (x86_arch_cap_msr & ARCH_CAP_RSBA)) setup_force_cpu_bug(X86_BUG_RETBLEED); } @@ -1333,7 +1333,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * disabling AVX2. The only way to do this in HW is to clear XCR0[2], * which means that AVX will be disabled. */ - if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) && + if (cpu_matches(cpu_vuln_blacklist, GDS) && !(x86_arch_cap_msr & ARCH_CAP_GDS_NO) && boot_cpu_has(X86_FEATURE_AVX)) setup_force_cpu_bug(X86_BUG_GDS); @@ -1342,11 +1342,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SRSO); } - if (vulnerable_to_rfds(ia32_cap)) + if (vulnerable_to_rfds(x86_arch_cap_msr)) setup_force_cpu_bug(X86_BUG_RFDS); /* When virtualized, eIBRS could be hidden, assume vulnerable */ - if (!(ia32_cap & ARCH_CAP_BHI_NO) && + if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) && !cpu_matches(cpu_vuln_whitelist, NO_BHI) && (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) || boot_cpu_has(X86_FEATURE_HYPERVISOR))) @@ -1356,7 +1356,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) return; /* Rogue Data Cache Load? No! */ - if (ia32_cap & ARCH_CAP_RDCL_NO) + if (x86_arch_cap_msr & ARCH_CAP_RDCL_NO) return; setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index 447ea279e691..957b6dd0751a 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c @@ -775,6 +775,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, unsigned int total_modes_count = 0; struct drm_client_offset *offsets; unsigned int connector_count = 0; + /* points to modes protected by mode_config.mutex */ struct drm_display_mode **modes; struct drm_crtc **crtcs; int i, ret = 0; @@ -843,7 +844,6 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, drm_client_pick_crtcs(client, connectors, connector_count, crtcs, modes, 0, width, height); } - mutex_unlock(&dev->mode_config.mutex); drm_client_modeset_release(client); @@ -873,6 +873,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, modeset->y = offset->y; } } + mutex_unlock(&dev->mode_config.mutex); mutex_unlock(&client->modeset_mutex); out: diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 745ffa7572e8..75defafb7901 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2000,7 +2000,7 @@ intel_set_cdclk_pre_plane_update(struct intel_atomic_state *state) &new_cdclk_state->actual)) return; - if (pipe == INVALID_PIPE || + if (new_cdclk_state->disable_pipes || old_cdclk_state->actual.cdclk <= new_cdclk_state->actual.cdclk) { drm_WARN_ON(&dev_priv->drm, !new_cdclk_state->base.changed); @@ -2029,7 +2029,7 @@ intel_set_cdclk_post_plane_update(struct intel_atomic_state *state) &new_cdclk_state->actual)) return; - if (pipe != INVALID_PIPE && + if (!new_cdclk_state->disable_pipes && old_cdclk_state->actual.cdclk > new_cdclk_state->actual.cdclk) { drm_WARN_ON(&dev_priv->drm, !new_cdclk_state->base.changed); @@ -2456,6 +2456,7 @@ static struct intel_global_state *intel_cdclk_duplicate_state(struct intel_globa return NULL; cdclk_state->pipe = INVALID_PIPE; + cdclk_state->disable_pipes = false; return &cdclk_state->base; } @@ -2575,6 +2576,8 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state) if (ret) return ret; + new_cdclk_state->disable_pipes = true; + drm_dbg_kms(&dev_priv->drm, "Modeset required for cdclk change\n"); } diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index b34eb00fb327..42376b5b3f53 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -52,6 +52,9 @@ struct intel_cdclk_state { /* bitmask of active pipes */ u8 active_pipes; + + /* update cdclk with pipes disabled */ + bool disable_pipes; }; int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c index 4bf486b57101..cb05f7f48a98 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c @@ -66,11 +66,16 @@ of_init(struct nvkm_bios *bios, const char *name) return ERR_PTR(-EINVAL); } +static void of_fini(void *p) +{ + kfree(p); +} + const struct nvbios_source nvbios_of = { .name = "OpenFirmware", .init = of_init, - .fini = (void(*)(void *))kfree, + .fini = of_fini, .read = of_read, .size = of_size, .rw = false, diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index b19f2f00b215..d4f26075383d 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -58,16 +58,56 @@ static long qxl_fence_wait(struct dma_fence *fence, bool intr, signed long timeout) { struct qxl_device *qdev; + struct qxl_release *release; + int count = 0, sc = 0; + bool have_drawable_releases; unsigned long cur, end = jiffies + timeout; qdev = container_of(fence->lock, struct qxl_device, release_lock); + release = container_of(fence, struct qxl_release, base); + have_drawable_releases = release->type == QXL_RELEASE_DRAWABLE; - if (!wait_event_timeout(qdev->release_event, - (dma_fence_is_signaled(fence) || - (qxl_io_notify_oom(qdev), 0)), - timeout)) - return 0; +retry: + sc++; + + if (dma_fence_is_signaled(fence)) + goto signaled; + + qxl_io_notify_oom(qdev); + + for (count = 0; count < 11; count++) { + if (!qxl_queue_garbage_collect(qdev, true)) + break; + + if (dma_fence_is_signaled(fence)) + goto signaled; + } + + if (dma_fence_is_signaled(fence)) + goto signaled; + + if (have_drawable_releases || sc < 4) { + if (sc > 2) + /* back off */ + usleep_range(500, 1000); + + if (time_after(jiffies, end)) + return 0; + + if (have_drawable_releases && sc > 300) { + DMA_FENCE_WARN(fence, + "failed to wait on release %llu after spincount %d\n", + fence->context & ~0xf0000000, sc); + goto signaled; + } + goto retry; + } + /* + * yeah, original sync_obj_wait gave up after 3 spins when + * have_drawable_releases is not set. + */ +signaled: cur = jiffies; if (time_after(cur, end)) return 0; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 3a9468b1d2c3..a96c9a15c9fe 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -88,7 +88,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu) struct page *pages; int irq, ret; - pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); + pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); if (!pages) { pr_warn("IOMMU: %s: Failed to allocate page request queue\n", iommu->name); diff --git a/drivers/media/cec/core/cec-adap.c b/drivers/media/cec/core/cec-adap.c index 99ede1417d72..01ff1329e01c 100644 --- a/drivers/media/cec/core/cec-adap.c +++ b/drivers/media/cec/core/cec-adap.c @@ -1117,20 +1117,6 @@ void cec_received_msg_ts(struct cec_adapter *adap, if (valid_la && min_len) { /* These messages have special length requirements */ switch (cmd) { - case CEC_MSG_TIMER_STATUS: - if (msg->msg[2] & 0x10) { - switch (msg->msg[2] & 0xf) { - case CEC_OP_PROG_INFO_NOT_ENOUGH_SPACE: - case CEC_OP_PROG_INFO_MIGHT_NOT_BE_ENOUGH_SPACE: - if (msg->len < 5) - valid_la = false; - break; - } - } else if ((msg->msg[2] & 0xf) == CEC_OP_PROG_ERROR_DUPLICATE) { - if (msg->len < 5) - valid_la = false; - } - break; case CEC_MSG_RECORD_ON: switch (msg->msg[2]) { case CEC_OP_RECORD_SRC_OWN: diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 14c47e614d33..f291d1e70f80 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -994,20 +994,173 @@ unlock_exit: mutex_unlock(&priv->reg_mutex); } -/* On page 205, section "8.6.3 Frame filtering" of the active standard, IEEE Std - * 802.1Qâ„¢-2022, it is stated that frames with 01:80:C2:00:00:00-0F as MAC DA - * must only be propagated to C-VLAN and MAC Bridge components. That means - * VLAN-aware and VLAN-unaware bridges. On the switch designs with CPU ports, - * these frames are supposed to be processed by the CPU (software). So we make - * the switch only forward them to the CPU port. And if received from a CPU - * port, forward to a single port. The software is responsible of making the - * switch conform to the latter by setting a single port as destination port on - * the special tag. +/* In Clause 5 of IEEE Std 802-2014, two sublayers of the data link layer (DLL) + * of the Open Systems Interconnection basic reference model (OSI/RM) are + * described; the medium access control (MAC) and logical link control (LLC) + * sublayers. The MAC sublayer is the one facing the physical layer. * - * This switch intellectual property cannot conform to this part of the standard - * fully. Whilst the REV_UN frame tag covers the remaining :04-0D and :0F MAC - * DAs, it also includes :22-FF which the scope of propagation is not supposed - * to be restricted for these MAC DAs. + * In 8.2 of IEEE Std 802.1Q-2022, the Bridge architecture is described. A + * Bridge component comprises a MAC Relay Entity for interconnecting the Ports + * of the Bridge, at least two Ports, and higher layer entities with at least a + * Spanning Tree Protocol Entity included. + * + * Each Bridge Port also functions as an end station and shall provide the MAC + * Service to an LLC Entity. Each instance of the MAC Service is provided to a + * distinct LLC Entity that supports protocol identification, multiplexing, and + * demultiplexing, for protocol data unit (PDU) transmission and reception by + * one or more higher layer entities. + * + * It is described in 8.13.9 of IEEE Std 802.1Q-2022 that in a Bridge, the LLC + * Entity associated with each Bridge Port is modeled as being directly + * connected to the attached Local Area Network (LAN). + * + * On the switch with CPU port architecture, CPU port functions as Management + * Port, and the Management Port functionality is provided by software which + * functions as an end station. Software is connected to an IEEE 802 LAN that is + * wholly contained within the system that incorporates the Bridge. Software + * provides access to the LLC Entity associated with each Bridge Port by the + * value of the source port field on the special tag on the frame received by + * software. + * + * We call frames that carry control information to determine the active + * topology and current extent of each Virtual Local Area Network (VLAN), i.e., + * spanning tree or Shortest Path Bridging (SPB) and Multiple VLAN Registration + * Protocol Data Units (MVRPDUs), and frames from other link constrained + * protocols, such as Extensible Authentication Protocol over LAN (EAPOL) and + * Link Layer Discovery Protocol (LLDP), link-local frames. They are not + * forwarded by a Bridge. Permanently configured entries in the filtering + * database (FDB) ensure that such frames are discarded by the Forwarding + * Process. In 8.6.3 of IEEE Std 802.1Q-2022, this is described in detail: + * + * Each of the reserved MAC addresses specified in Table 8-1 + * (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]) shall be + * permanently configured in the FDB in C-VLAN components and ERs. + * + * Each of the reserved MAC addresses specified in Table 8-2 + * (01-80-C2-00-00-[01,02,03,04,05,06,07,08,09,0A,0E]) shall be permanently + * configured in the FDB in S-VLAN components. + * + * Each of the reserved MAC addresses specified in Table 8-3 + * (01-80-C2-00-00-[01,02,04,0E]) shall be permanently configured in the FDB in + * TPMR components. + * + * The FDB entries for reserved MAC addresses shall specify filtering for all + * Bridge Ports and all VIDs. Management shall not provide the capability to + * modify or remove entries for reserved MAC addresses. + * + * The addresses in Table 8-1, Table 8-2, and Table 8-3 determine the scope of + * propagation of PDUs within a Bridged Network, as follows: + * + * The Nearest Bridge group address (01-80-C2-00-00-0E) is an address that no + * conformant Two-Port MAC Relay (TPMR) component, Service VLAN (S-VLAN) + * component, Customer VLAN (C-VLAN) component, or MAC Bridge can forward. + * PDUs transmitted using this destination address, or any other addresses + * that appear in Table 8-1, Table 8-2, and Table 8-3 + * (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]), can + * therefore travel no further than those stations that can be reached via a + * single individual LAN from the originating station. + * + * The Nearest non-TPMR Bridge group address (01-80-C2-00-00-03), is an + * address that no conformant S-VLAN component, C-VLAN component, or MAC + * Bridge can forward; however, this address is relayed by a TPMR component. + * PDUs using this destination address, or any of the other addresses that + * appear in both Table 8-1 and Table 8-2 but not in Table 8-3 + * (01-80-C2-00-00-[00,03,05,06,07,08,09,0A,0B,0C,0D,0F]), will be relayed by + * any TPMRs but will propagate no further than the nearest S-VLAN component, + * C-VLAN component, or MAC Bridge. + * + * The Nearest Customer Bridge group address (01-80-C2-00-00-00) is an address + * that no conformant C-VLAN component, MAC Bridge can forward; however, it is + * relayed by TPMR components and S-VLAN components. PDUs using this + * destination address, or any of the other addresses that appear in Table 8-1 + * but not in either Table 8-2 or Table 8-3 (01-80-C2-00-00-[00,0B,0C,0D,0F]), + * will be relayed by TPMR components and S-VLAN components but will propagate + * no further than the nearest C-VLAN component or MAC Bridge. + * + * Because the LLC Entity associated with each Bridge Port is provided via CPU + * port, we must not filter these frames but forward them to CPU port. + * + * In a Bridge, the transmission Port is majorly decided by ingress and egress + * rules, FDB, and spanning tree Port State functions of the Forwarding Process. + * For link-local frames, only CPU port should be designated as destination port + * in the FDB, and the other functions of the Forwarding Process must not + * interfere with the decision of the transmission Port. We call this process + * trapping frames to CPU port. + * + * Therefore, on the switch with CPU port architecture, link-local frames must + * be trapped to CPU port, and certain link-local frames received by a Port of a + * Bridge comprising a TPMR component or an S-VLAN component must be excluded + * from it. + * + * A Bridge of the switch with CPU port architecture cannot comprise a Two-Port + * MAC Relay (TPMR) component as a TPMR component supports only a subset of the + * functionality of a MAC Bridge. A Bridge comprising two Ports (Management Port + * doesn't count) of this architecture will either function as a standard MAC + * Bridge or a standard VLAN Bridge. + * + * Therefore, a Bridge of this architecture can only comprise S-VLAN components, + * C-VLAN components, or MAC Bridge components. Since there's no TPMR component, + * we don't need to relay PDUs using the destination addresses specified on the + * Nearest non-TPMR section, and the proportion of the Nearest Customer Bridge + * section where they must be relayed by TPMR components. + * + * One option to trap link-local frames to CPU port is to add static FDB entries + * with CPU port designated as destination port. However, because that + * Independent VLAN Learning (IVL) is being used on every VID, each entry only + * applies to a single VLAN Identifier (VID). For a Bridge comprising a MAC + * Bridge component or a C-VLAN component, there would have to be 16 times 4096 + * entries. This switch intellectual property can only hold a maximum of 2048 + * entries. Using this option, there also isn't a mechanism to prevent + * link-local frames from being discarded when the spanning tree Port State of + * the reception Port is discarding. + * + * The remaining option is to utilise the BPC, RGAC1, RGAC2, RGAC3, and RGAC4 + * registers. Whilst this applies to every VID, it doesn't contain all of the + * reserved MAC addresses without affecting the remaining Standard Group MAC + * Addresses. The REV_UN frame tag utilised using the RGAC4 register covers the + * remaining 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F] destination + * addresses. It also includes the 01-80-C2-00-00-22 to 01-80-C2-00-00-FF + * destination addresses which may be relayed by MAC Bridges or VLAN Bridges. + * The latter option provides better but not complete conformance. + * + * This switch intellectual property also does not provide a mechanism to trap + * link-local frames with specific destination addresses to CPU port by Bridge, + * to conform to the filtering rules for the distinct Bridge components. + * + * Therefore, regardless of the type of the Bridge component, link-local frames + * with these destination addresses will be trapped to CPU port: + * + * 01-80-C2-00-00-[00,01,02,03,0E] + * + * In a Bridge comprising a MAC Bridge component or a C-VLAN component: + * + * Link-local frames with these destination addresses won't be trapped to CPU + * port which won't conform to IEEE Std 802.1Q-2022: + * + * 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F] + * + * In a Bridge comprising an S-VLAN component: + * + * Link-local frames with these destination addresses will be trapped to CPU + * port which won't conform to IEEE Std 802.1Q-2022: + * + * 01-80-C2-00-00-00 + * + * Link-local frames with these destination addresses won't be trapped to CPU + * port which won't conform to IEEE Std 802.1Q-2022: + * + * 01-80-C2-00-00-[04,05,06,07,08,09,0A] + * + * To trap link-local frames to CPU port as conformant as this switch + * intellectual property can allow, link-local frames are made to be regarded as + * Bridge Protocol Data Units (BPDUs). This is because this switch intellectual + * property only lets the frames regarded as BPDUs bypass the spanning tree Port + * State function of the Forwarding Process. + * + * The only remaining interference is the ingress rules. When the reception Port + * has no PVID assigned on software, VLAN-untagged frames won't be allowed in. + * There doesn't seem to be a mechanism on the switch intellectual property to + * have link-local frames bypass this function of the Forwarding Process. */ static void mt753x_trap_frames(struct mt7530_priv *priv) @@ -1015,35 +1168,43 @@ mt753x_trap_frames(struct mt7530_priv *priv) /* Trap 802.1X PAE frames and BPDUs to the CPU port(s) and egress them * VLAN-untagged. */ - mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_EG_TAG_MASK | - MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK | - MT753X_BPDU_PORT_FW_MASK, - MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) | - MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_BPDU_CPU_ONLY); + mt7530_rmw(priv, MT753X_BPC, + MT753X_PAE_BPDU_FR | MT753X_PAE_EG_TAG_MASK | + MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK | + MT753X_BPDU_PORT_FW_MASK, + MT753X_PAE_BPDU_FR | + MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_BPDU_CPU_ONLY); /* Trap frames with :01 and :02 MAC DAs to the CPU port(s) and egress * them VLAN-untagged. */ - mt7530_rmw(priv, MT753X_RGAC1, MT753X_R02_EG_TAG_MASK | - MT753X_R02_PORT_FW_MASK | MT753X_R01_EG_TAG_MASK | - MT753X_R01_PORT_FW_MASK, - MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) | - MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_BPDU_CPU_ONLY); + mt7530_rmw(priv, MT753X_RGAC1, + MT753X_R02_BPDU_FR | MT753X_R02_EG_TAG_MASK | + MT753X_R02_PORT_FW_MASK | MT753X_R01_BPDU_FR | + MT753X_R01_EG_TAG_MASK | MT753X_R01_PORT_FW_MASK, + MT753X_R02_BPDU_FR | + MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_R01_BPDU_FR | + MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_BPDU_CPU_ONLY); /* Trap frames with :03 and :0E MAC DAs to the CPU port(s) and egress * them VLAN-untagged. */ - mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_EG_TAG_MASK | - MT753X_R0E_PORT_FW_MASK | MT753X_R03_EG_TAG_MASK | - MT753X_R03_PORT_FW_MASK, - MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) | - MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_BPDU_CPU_ONLY); + mt7530_rmw(priv, MT753X_RGAC2, + MT753X_R0E_BPDU_FR | MT753X_R0E_EG_TAG_MASK | + MT753X_R0E_PORT_FW_MASK | MT753X_R03_BPDU_FR | + MT753X_R03_EG_TAG_MASK | MT753X_R03_PORT_FW_MASK, + MT753X_R0E_BPDU_FR | + MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_R03_BPDU_FR | + MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_BPDU_CPU_ONLY); } static int diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 03598f9ae288..299a26ad5809 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -64,6 +64,7 @@ enum mt753x_id { /* Registers for BPDU and PAE frame control*/ #define MT753X_BPC 0x24 +#define MT753X_PAE_BPDU_FR BIT(25) #define MT753X_PAE_EG_TAG_MASK GENMASK(24, 22) #define MT753X_PAE_EG_TAG(x) FIELD_PREP(MT753X_PAE_EG_TAG_MASK, x) #define MT753X_PAE_PORT_FW_MASK GENMASK(18, 16) @@ -74,20 +75,24 @@ enum mt753x_id { /* Register for :01 and :02 MAC DA frame control */ #define MT753X_RGAC1 0x28 +#define MT753X_R02_BPDU_FR BIT(25) #define MT753X_R02_EG_TAG_MASK GENMASK(24, 22) #define MT753X_R02_EG_TAG(x) FIELD_PREP(MT753X_R02_EG_TAG_MASK, x) #define MT753X_R02_PORT_FW_MASK GENMASK(18, 16) #define MT753X_R02_PORT_FW(x) FIELD_PREP(MT753X_R02_PORT_FW_MASK, x) +#define MT753X_R01_BPDU_FR BIT(9) #define MT753X_R01_EG_TAG_MASK GENMASK(8, 6) #define MT753X_R01_EG_TAG(x) FIELD_PREP(MT753X_R01_EG_TAG_MASK, x) #define MT753X_R01_PORT_FW_MASK GENMASK(2, 0) /* Register for :03 and :0E MAC DA frame control */ #define MT753X_RGAC2 0x2c +#define MT753X_R0E_BPDU_FR BIT(25) #define MT753X_R0E_EG_TAG_MASK GENMASK(24, 22) #define MT753X_R0E_EG_TAG(x) FIELD_PREP(MT753X_R0E_EG_TAG_MASK, x) #define MT753X_R0E_PORT_FW_MASK GENMASK(18, 16) #define MT753X_R0E_PORT_FW(x) FIELD_PREP(MT753X_R0E_PORT_FW_MASK, x) +#define MT753X_R03_BPDU_FR BIT(9) #define MT753X_R03_EG_TAG_MASK GENMASK(8, 6) #define MT753X_R03_EG_TAG(x) FIELD_PREP(MT753X_R03_EG_TAG_MASK, x) #define MT753X_R03_PORT_FW_MASK GENMASK(2, 0) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 7979b1019242..e37c82eb6232 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -362,7 +362,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, ENA_COM_BOUNCE_BUFFER_CNTRL_CNT; io_sq->bounce_buf_ctrl.next_to_use = 0; - size = io_sq->bounce_buf_ctrl.buffer_size * + size = (size_t)io_sq->bounce_buf_ctrl.buffer_size * io_sq->bounce_buf_ctrl.buffers_num; dev_node = dev_to_node(ena_dev->dmadev); diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 43c099141e21..3ea449be7bdc 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1205,8 +1205,11 @@ static void ena_unmap_tx_buff(struct ena_ring *tx_ring, static void ena_free_tx_bufs(struct ena_ring *tx_ring) { bool print_once = true; + bool is_xdp_ring; u32 i; + is_xdp_ring = ENA_IS_XDP_INDEX(tx_ring->adapter, tx_ring->qid); + for (i = 0; i < tx_ring->ring_size; i++) { struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i]; @@ -1226,10 +1229,15 @@ static void ena_free_tx_bufs(struct ena_ring *tx_ring) ena_unmap_tx_buff(tx_ring, tx_info); - dev_kfree_skb_any(tx_info->skb); + if (is_xdp_ring) + xdp_return_frame(tx_info->xdpf); + else + dev_kfree_skb_any(tx_info->skb); } - netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->qid)); + + if (!is_xdp_ring) + netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->qid)); } static void ena_free_all_tx_bufs(struct ena_adapter *adapter) @@ -3815,10 +3823,11 @@ static void check_for_missing_completions(struct ena_adapter *adapter) { struct ena_ring *tx_ring; struct ena_ring *rx_ring; - int i, budget, rc; + int qid, budget, rc; int io_queue_count; io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues; + /* Make sure the driver doesn't turn the device in other process */ smp_rmb(); @@ -3831,27 +3840,29 @@ static void check_for_missing_completions(struct ena_adapter *adapter) if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT) return; - budget = ENA_MONITORED_TX_QUEUES; + budget = min_t(u32, io_queue_count, ENA_MONITORED_TX_QUEUES); - for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) { - tx_ring = &adapter->tx_ring[i]; - rx_ring = &adapter->rx_ring[i]; + qid = adapter->last_monitored_tx_qid; + + while (budget) { + qid = (qid + 1) % io_queue_count; + + tx_ring = &adapter->tx_ring[qid]; + rx_ring = &adapter->rx_ring[qid]; rc = check_missing_comp_in_tx_queue(adapter, tx_ring); if (unlikely(rc)) return; - rc = !ENA_IS_XDP_INDEX(adapter, i) ? + rc = !ENA_IS_XDP_INDEX(adapter, qid) ? check_for_rx_interrupt_queue(adapter, rx_ring) : 0; if (unlikely(rc)) return; budget--; - if (!budget) - break; } - adapter->last_monitored_tx_qid = i % io_queue_count; + adapter->last_monitored_tx_qid = qid; } /* trigger napi schedule after 2 consecutive detections */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index bda93e550b08..68fe4890cfb2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -12,8 +12,10 @@ #include "rvu_reg.h" #include "rvu.h" #include "npc.h" +#include "mcs.h" #include "cgx.h" #include "lmac_common.h" +#include "rvu_npc_hash.h" static void nix_free_tx_vtag_entries(struct rvu *rvu, u16 pcifunc); static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req, @@ -70,12 +72,19 @@ enum nix_makr_fmt_indexes { /* For now considering MC resources needed for broadcast * pkt replication only. i.e 256 HWVFs + 12 PFs. */ -#define MC_TBL_SIZE MC_TBL_SZ_512 -#define MC_BUF_CNT MC_BUF_CNT_128 +#define MC_TBL_SIZE MC_TBL_SZ_2K +#define MC_BUF_CNT MC_BUF_CNT_1024 + +#define MC_TX_MAX 2048 struct mce { struct hlist_node node; + u32 rq_rss_index; u16 pcifunc; + u16 channel; + u8 dest_type; + u8 is_active; + u8 reserved[2]; }; int rvu_get_next_nix_blkaddr(struct rvu *rvu, int blkaddr) @@ -163,18 +172,33 @@ static void nix_mce_list_init(struct nix_mce_list *list, int max) list->max = max; } -static u16 nix_alloc_mce_list(struct nix_mcast *mcast, int count) +static int nix_alloc_mce_list(struct nix_mcast *mcast, int count, u8 dir) { + struct rsrc_bmap *mce_counter; int idx; if (!mcast) - return 0; + return -EINVAL; + + mce_counter = &mcast->mce_counter[dir]; + if (!rvu_rsrc_check_contig(mce_counter, count)) + return -ENOSPC; - idx = mcast->next_free_mce; - mcast->next_free_mce += count; + idx = rvu_alloc_rsrc_contig(mce_counter, count); return idx; } +static void nix_free_mce_list(struct nix_mcast *mcast, int count, int start, u8 dir) +{ + struct rsrc_bmap *mce_counter; + + if (!mcast) + return; + + mce_counter = &mcast->mce_counter[dir]; + rvu_free_rsrc_contig(mce_counter, count, start); +} + struct nix_hw *get_nix_hw(struct rvu_hwinfo *hw, int blkaddr) { int nix_blkaddr = 0, i = 0; @@ -190,6 +214,18 @@ struct nix_hw *get_nix_hw(struct rvu_hwinfo *hw, int blkaddr) return NULL; } +int nix_get_dwrr_mtu_reg(struct rvu_hwinfo *hw, int smq_link_type) +{ + if (hw->cap.nix_multiple_dwrr_mtu) + return NIX_AF_DWRR_MTUX(smq_link_type); + + if (smq_link_type == SMQ_LINK_TYPE_SDP) + return NIX_AF_DWRR_SDP_MTU; + + /* Here it's same reg for RPM and LBK */ + return NIX_AF_DWRR_RPM_MTU; +} + u32 convert_dwrr_mtu_to_bytes(u8 dwrr_mtu) { dwrr_mtu &= 0x1FULL; @@ -322,8 +358,11 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf, pfvf->tx_chan_cnt = 1; rsp->tx_link = cgx_id * hw->lmac_per_cgx + lmac_id; - cgx_set_pkind(rvu_cgx_pdata(cgx_id, rvu), lmac_id, pkind); - rvu_npc_set_pkind(rvu, pkind, pfvf); + if (rvu_cgx_is_pkind_config_permitted(rvu, pcifunc)) { + cgx_set_pkind(rvu_cgx_pdata(cgx_id, rvu), lmac_id, + pkind); + rvu_npc_set_pkind(rvu, pkind, pfvf); + } break; case NIX_INTF_TYPE_LBK: @@ -463,14 +502,190 @@ static void nix_interface_deinit(struct rvu *rvu, u16 pcifunc, u8 nixlf) rvu_cgx_disable_dmac_entries(rvu, pcifunc); } -int rvu_mbox_handler_nix_bp_disable(struct rvu *rvu, - struct nix_bp_cfg_req *req, +#define NIX_BPIDS_PER_LMAC 8 +#define NIX_BPIDS_PER_CPT 1 +static int nix_setup_bpids(struct rvu *rvu, struct nix_hw *hw, int blkaddr) +{ + struct nix_bp *bp = &hw->bp; + int err, max_bpids; + u64 cfg; + + cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST1); + max_bpids = (cfg >> 12) & 0xFFF; + + /* Reserve the BPIds for CGX and SDP */ + bp->cgx_bpid_cnt = rvu->hw->cgx_links * NIX_BPIDS_PER_LMAC; + bp->sdp_bpid_cnt = rvu->hw->sdp_links * (cfg & 0xFFF); + bp->free_pool_base = bp->cgx_bpid_cnt + bp->sdp_bpid_cnt + + NIX_BPIDS_PER_CPT; + bp->bpids.max = max_bpids - bp->free_pool_base; + + err = rvu_alloc_bitmap(&bp->bpids); + if (err) + return err; + + bp->fn_map = devm_kcalloc(rvu->dev, bp->bpids.max, + sizeof(u16), GFP_KERNEL); + if (!bp->fn_map) + return -ENOMEM; + + bp->intf_map = devm_kcalloc(rvu->dev, bp->bpids.max, + sizeof(u8), GFP_KERNEL); + if (!bp->intf_map) + return -ENOMEM; + + bp->ref_cnt = devm_kcalloc(rvu->dev, bp->bpids.max, + sizeof(u8), GFP_KERNEL); + if (!bp->ref_cnt) + return -ENOMEM; + + return 0; +} + +void rvu_nix_flr_free_bpids(struct rvu *rvu, u16 pcifunc) +{ + int blkaddr, bpid, err; + struct nix_hw *nix_hw; + struct nix_bp *bp; + + if (!is_afvf(pcifunc)) + return; + + err = nix_get_struct_ptrs(rvu, pcifunc, &nix_hw, &blkaddr); + if (err) + return; + + bp = &nix_hw->bp; + + mutex_lock(&rvu->rsrc_lock); + for (bpid = 0; bpid < bp->bpids.max; bpid++) { + if (bp->fn_map[bpid] == pcifunc) { + bp->ref_cnt[bpid]--; + if (bp->ref_cnt[bpid]) + continue; + rvu_free_rsrc(&bp->bpids, bpid); + bp->fn_map[bpid] = 0; + } + } + mutex_unlock(&rvu->rsrc_lock); +} + +int rvu_mbox_handler_nix_rx_chan_cfg(struct rvu *rvu, + struct nix_rx_chan_cfg *req, + struct nix_rx_chan_cfg *rsp) +{ + struct rvu_pfvf *pfvf; + int blkaddr; + u16 chan; + + pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc); + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, req->hdr.pcifunc); + chan = pfvf->rx_chan_base + req->chan; + + if (req->type == NIX_INTF_TYPE_CPT) + chan = chan | BIT(11); + + if (req->read) { + rsp->val = rvu_read64(rvu, blkaddr, + NIX_AF_RX_CHANX_CFG(chan)); + rsp->chan = req->chan; + } else { + rvu_write64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan), req->val); + } + return 0; +} + +int rvu_mbox_handler_nix_alloc_bpids(struct rvu *rvu, + struct nix_alloc_bpid_req *req, + struct nix_bpids *rsp) +{ + u16 pcifunc = req->hdr.pcifunc; + struct nix_hw *nix_hw; + int blkaddr, cnt = 0; + struct nix_bp *bp; + int bpid, err; + + err = nix_get_struct_ptrs(rvu, pcifunc, &nix_hw, &blkaddr); + if (err) + return err; + + bp = &nix_hw->bp; + + /* For interface like sso uses same bpid across multiple + * application. Find the bpid is it already allocate or + * allocate a new one. + */ + mutex_lock(&rvu->rsrc_lock); + if (req->type > NIX_INTF_TYPE_CPT || req->type == NIX_INTF_TYPE_LBK) { + for (bpid = 0; bpid < bp->bpids.max; bpid++) { + if (bp->intf_map[bpid] == req->type) { + rsp->bpids[cnt] = bpid + bp->free_pool_base; + rsp->bpid_cnt++; + bp->ref_cnt[bpid]++; + cnt++; + } + } + if (rsp->bpid_cnt) + goto exit; + } + + for (cnt = 0; cnt < req->bpid_cnt; cnt++) { + bpid = rvu_alloc_rsrc(&bp->bpids); + if (bpid < 0) + goto exit; + rsp->bpids[cnt] = bpid + bp->free_pool_base; + bp->intf_map[bpid] = req->type; + bp->fn_map[bpid] = pcifunc; + bp->ref_cnt[bpid]++; + rsp->bpid_cnt++; + } +exit: + mutex_unlock(&rvu->rsrc_lock); + return 0; +} + +int rvu_mbox_handler_nix_free_bpids(struct rvu *rvu, + struct nix_bpids *req, struct msg_rsp *rsp) { u16 pcifunc = req->hdr.pcifunc; + int blkaddr, cnt, err, id; + struct nix_hw *nix_hw; + struct nix_bp *bp; + u16 bpid; + + err = nix_get_struct_ptrs(rvu, pcifunc, &nix_hw, &blkaddr); + if (err) + return err; + + bp = &nix_hw->bp; + mutex_lock(&rvu->rsrc_lock); + for (cnt = 0; cnt < req->bpid_cnt; cnt++) { + bpid = req->bpids[cnt] - bp->free_pool_base; + bp->ref_cnt[bpid]--; + if (bp->ref_cnt[bpid]) + continue; + rvu_free_rsrc(&bp->bpids, bpid); + for (id = 0; id < bp->bpids.max; id++) { + if (bp->fn_map[id] == pcifunc) + bp->fn_map[id] = 0; + } + } + mutex_unlock(&rvu->rsrc_lock); + return 0; +} + +static int nix_bp_disable(struct rvu *rvu, + struct nix_bp_cfg_req *req, + struct msg_rsp *rsp, bool cpt_link) +{ + u16 pcifunc = req->hdr.pcifunc; + int blkaddr, pf, type, err; struct rvu_pfvf *pfvf; - int blkaddr, pf, type; + struct nix_hw *nix_hw; u16 chan_base, chan; + struct nix_bp *bp; + u16 chan_v, bpid; u64 cfg; pf = rvu_get_pf(pcifunc); @@ -478,41 +693,89 @@ int rvu_mbox_handler_nix_bp_disable(struct rvu *rvu, if (!is_pf_cgxmapped(rvu, pf) && type != NIX_INTF_TYPE_LBK) return 0; + if (is_sdp_pfvf(pcifunc)) + type = NIX_INTF_TYPE_SDP; + + if (cpt_link && !rvu->hw->cpt_links) + return 0; + pfvf = rvu_get_pfvf(rvu, pcifunc); - blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); + err = nix_get_struct_ptrs(rvu, pcifunc, &nix_hw, &blkaddr); + if (err) + return err; + bp = &nix_hw->bp; chan_base = pfvf->rx_chan_base + req->chan_base; + + if (cpt_link) { + type = NIX_INTF_TYPE_CPT; + cfg = rvu_read64(rvu, blkaddr, CPT_AF_X2PX_LINK_CFG(0)); + /* MODE=0 or MODE=1 => CPT looks only channels starting from cpt chan base */ + cfg = (cfg >> 20) & 0x3; + if (cfg != 2) + chan_base = rvu->hw->cpt_chan_base; + } + for (chan = chan_base; chan < (chan_base + req->chan_cnt); chan++) { - cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan)); - rvu_write64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan), + /* CPT channel for a given link channel is always + * assumed to be BIT(11) set in link channel. + */ + if (cpt_link) + chan_v = chan | BIT(11); + else + chan_v = chan; + + cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan_v)); + rvu_write64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan_v), cfg & ~BIT_ULL(16)); + + if (type == NIX_INTF_TYPE_LBK) { + bpid = cfg & GENMASK(8, 0); + mutex_lock(&rvu->rsrc_lock); + rvu_free_rsrc(&bp->bpids, bpid - bp->free_pool_base); + for (bpid = 0; bpid < bp->bpids.max; bpid++) { + if (bp->fn_map[bpid] == pcifunc) { + bp->fn_map[bpid] = 0; + bp->ref_cnt[bpid] = 0; + } + } + mutex_unlock(&rvu->rsrc_lock); + } } return 0; } +int rvu_mbox_handler_nix_bp_disable(struct rvu *rvu, + struct nix_bp_cfg_req *req, + struct msg_rsp *rsp) +{ + return nix_bp_disable(rvu, req, rsp, false); +} + +int rvu_mbox_handler_nix_cpt_bp_disable(struct rvu *rvu, + struct nix_bp_cfg_req *req, + struct msg_rsp *rsp) +{ + return nix_bp_disable(rvu, req, rsp, true); +} + static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req, int type, int chan_id) { - int bpid, blkaddr, lmac_chan_cnt, sdp_chan_cnt; - u16 cgx_bpid_cnt, lbk_bpid_cnt, sdp_bpid_cnt; + int bpid, blkaddr, sdp_chan_base, err; struct rvu_hwinfo *hw = rvu->hw; struct rvu_pfvf *pfvf; + struct nix_hw *nix_hw; u8 cgx_id, lmac_id; - u64 cfg; - - blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, req->hdr.pcifunc); - cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST); - lmac_chan_cnt = cfg & 0xFF; - - cgx_bpid_cnt = hw->cgx_links * lmac_chan_cnt; - lbk_bpid_cnt = hw->lbk_links * ((cfg >> 16) & 0xFF); - - cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST1); - sdp_chan_cnt = cfg & 0xFFF; - sdp_bpid_cnt = hw->sdp_links * sdp_chan_cnt; + struct nix_bp *bp; pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc); + err = nix_get_struct_ptrs(rvu, req->hdr.pcifunc, &nix_hw, &blkaddr); + if (err) + return err; + + bp = &nix_hw->bp; /* Backpressure IDs range division * CGX channles are mapped to (0 - 191) BPIDs * LBK channles are mapped to (192 - 255) BPIDs @@ -525,38 +788,52 @@ static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req, */ switch (type) { case NIX_INTF_TYPE_CGX: - if ((req->chan_base + req->chan_cnt) > 15) - return -EINVAL; + if ((req->chan_base + req->chan_cnt) > NIX_BPIDS_PER_LMAC) + return NIX_AF_ERR_INVALID_BPID_REQ; + rvu_get_cgx_lmac_id(pfvf->cgx_lmac, &cgx_id, &lmac_id); /* Assign bpid based on cgx, lmac and chan id */ - bpid = (cgx_id * hw->lmac_per_cgx * lmac_chan_cnt) + - (lmac_id * lmac_chan_cnt) + req->chan_base; + bpid = (cgx_id * hw->lmac_per_cgx * NIX_BPIDS_PER_LMAC) + + (lmac_id * NIX_BPIDS_PER_LMAC) + req->chan_base; if (req->bpid_per_chan) bpid += chan_id; - if (bpid > cgx_bpid_cnt) - return -EINVAL; + if (bpid > bp->cgx_bpid_cnt) + return NIX_AF_ERR_INVALID_BPID; + break; + case NIX_INTF_TYPE_CPT: + bpid = bp->cgx_bpid_cnt + bp->sdp_bpid_cnt; break; - case NIX_INTF_TYPE_LBK: - if ((req->chan_base + req->chan_cnt) > 63) - return -EINVAL; - bpid = cgx_bpid_cnt + req->chan_base; - if (req->bpid_per_chan) - bpid += chan_id; - if (bpid > (cgx_bpid_cnt + lbk_bpid_cnt)) - return -EINVAL; + /* Alloc bpid from the free pool */ + mutex_lock(&rvu->rsrc_lock); + bpid = rvu_alloc_rsrc(&bp->bpids); + if (bpid < 0) { + mutex_unlock(&rvu->rsrc_lock); + return NIX_AF_ERR_INVALID_BPID; + } + bp->fn_map[bpid] = req->hdr.pcifunc; + bp->ref_cnt[bpid]++; + bpid += bp->free_pool_base; + mutex_unlock(&rvu->rsrc_lock); break; case NIX_INTF_TYPE_SDP: - if ((req->chan_base + req->chan_cnt) > 255) - return -EINVAL; + if ((req->chan_base + req->chan_cnt) > bp->sdp_bpid_cnt) + return NIX_AF_ERR_INVALID_BPID_REQ; + + /* Handle usecase of 2 SDP blocks */ + if (!hw->cap.programmable_chans) + sdp_chan_base = pfvf->rx_chan_base - NIX_CHAN_SDP_CH_START; + else + sdp_chan_base = pfvf->rx_chan_base - hw->sdp_chan_base; + + bpid = bp->cgx_bpid_cnt + req->chan_base + sdp_chan_base; - bpid = sdp_bpid_cnt + req->chan_base; if (req->bpid_per_chan) bpid += chan_id; - if (bpid > (cgx_bpid_cnt + lbk_bpid_cnt + sdp_bpid_cnt)) - return -EINVAL; + if (bpid > (bp->cgx_bpid_cnt + bp->sdp_bpid_cnt)) + return NIX_AF_ERR_INVALID_BPID; break; default: return -EINVAL; @@ -564,15 +841,17 @@ static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req, return bpid; } -int rvu_mbox_handler_nix_bp_enable(struct rvu *rvu, - struct nix_bp_cfg_req *req, - struct nix_bp_cfg_rsp *rsp) +static int nix_bp_enable(struct rvu *rvu, + struct nix_bp_cfg_req *req, + struct nix_bp_cfg_rsp *rsp, + bool cpt_link) { int blkaddr, pf, type, chan_id = 0; u16 pcifunc = req->hdr.pcifunc; + s16 bpid, bpid_base = -1; struct rvu_pfvf *pfvf; u16 chan_base, chan; - s16 bpid, bpid_base; + u16 chan_v; u64 cfg; pf = rvu_get_pf(pcifunc); @@ -585,25 +864,46 @@ int rvu_mbox_handler_nix_bp_enable(struct rvu *rvu, type != NIX_INTF_TYPE_SDP) return 0; + if (cpt_link && !rvu->hw->cpt_links) + return 0; + pfvf = rvu_get_pfvf(rvu, pcifunc); blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); - bpid_base = rvu_nix_get_bpid(rvu, req, type, chan_id); chan_base = pfvf->rx_chan_base + req->chan_base; - bpid = bpid_base; + + if (cpt_link) { + type = NIX_INTF_TYPE_CPT; + cfg = rvu_read64(rvu, blkaddr, CPT_AF_X2PX_LINK_CFG(0)); + /* MODE=0 or MODE=1 => CPT looks only channels starting from cpt chan base */ + cfg = (cfg >> 20) & 0x3; + if (cfg != 2) + chan_base = rvu->hw->cpt_chan_base; + } for (chan = chan_base; chan < (chan_base + req->chan_cnt); chan++) { + bpid = rvu_nix_get_bpid(rvu, req, type, chan_id); if (bpid < 0) { dev_warn(rvu->dev, "Fail to enable backpressure\n"); return -EINVAL; } + if (bpid_base < 0) + bpid_base = bpid; + + /* CPT channel for a given link channel is always + * assumed to be BIT(11) set in link channel. + */ + + if (cpt_link) + chan_v = chan | BIT(11); + else + chan_v = chan; - cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan)); + cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan_v)); cfg &= ~GENMASK_ULL(8, 0); - rvu_write64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan), + rvu_write64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan_v), cfg | (bpid & GENMASK_ULL(8, 0)) | BIT_ULL(16)); chan_id++; - bpid = rvu_nix_get_bpid(rvu, req, type, chan_id); } for (chan = 0; chan < req->chan_cnt; chan++) { @@ -618,6 +918,20 @@ int rvu_mbox_handler_nix_bp_enable(struct rvu *rvu, return 0; } +int rvu_mbox_handler_nix_bp_enable(struct rvu *rvu, + struct nix_bp_cfg_req *req, + struct nix_bp_cfg_rsp *rsp) +{ + return nix_bp_enable(rvu, req, rsp, false); +} + +int rvu_mbox_handler_nix_cpt_bp_enable(struct rvu *rvu, + struct nix_bp_cfg_req *req, + struct nix_bp_cfg_rsp *rsp) +{ + return nix_bp_enable(rvu, req, rsp, true); +} + static void nix_setup_lso_tso_l3(struct rvu *rvu, int blkaddr, u64 format, bool v4, u64 *fidx) { @@ -782,17 +1096,51 @@ static int nixlf_rss_ctx_init(struct rvu *rvu, int blkaddr, return 0; } +static void nix_aq_reset(struct rvu *rvu, struct rvu_block *block) +{ + struct admin_queue *aq = block->aq; + u64 reg, head, tail; + int timeout = 2000; + + /* check if any AQ err is set and reset the AQ */ + reg = rvu_read64(rvu, block->addr, NIX_AF_AQ_STATUS); + head = (reg >> 4) & AQ_PTR_MASK; + tail = (reg >> 36) & AQ_PTR_MASK; + dev_err(rvu->dev, "AQ error occurred head:0x%llx tail:%llx status:%llx\n", head, tail, reg); + + /* Check if busy bit is set */ + while (reg & BIT_ULL(62)) { + udelay(1); + timeout--; + if (!timeout) + dev_err(rvu->dev, "timeout waiting for busy bit to clear\n"); + } + /*reset the AQ base and result */ + memset(aq->inst->base, 0, sizeof(struct nix_aq_inst_s) * Q_COUNT(AQ_SIZE)); + memset(aq->res->base, 0, sizeof(struct nix_aq_res_s) * Q_COUNT(AQ_SIZE)); + /* Make sure the AQ memry is reset */ + wmb(); + reg = rvu_read64(rvu, block->addr, NIX_AF_AQ_STATUS); + reg |= BIT_ULL(63); + rvu_write64(rvu, block->addr, NIX_AF_AQ_STATUS, reg); + dev_info(rvu->dev, "AQ status after reset:0x%llx\n", + rvu_read64(rvu, block->addr, NIX_AF_AQ_STATUS)); +} + static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block, struct nix_aq_inst_s *inst) { struct admin_queue *aq = block->aq; struct nix_aq_res_s *result; - int timeout = 1000; - u64 reg, head; + u64 reg, head, intr; + int timeout = 2000; int ret; - result = (struct nix_aq_res_s *)aq->res->base; + reg = rvu_read64(rvu, block->addr, NIX_AF_AQ_STATUS); + if (reg & BIT_ULL(63)) + nix_aq_reset(rvu, block); + result = (struct nix_aq_res_s *)aq->res->base; /* Get current head pointer where to append this instruction */ reg = rvu_read64(rvu, block->addr, NIX_AF_AQ_STATUS); head = (reg >> 4) & AQ_PTR_MASK; @@ -806,18 +1154,28 @@ static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block, /* Ring the doorbell and wait for result */ rvu_write64(rvu, block->addr, NIX_AF_AQ_DOOR, 1); while (result->compcode == NIX_AQ_COMP_NOTDONE) { + intr = rvu_read64(rvu, block->addr, NIX_AF_ERR_INT); cpu_relax(); udelay(1); timeout--; - if (!timeout) + if (!timeout) { + dev_err_ratelimited(rvu->dev, + "%s wait timeout intr=0x%llx status=0x%llx compcode:%d\n", + __func__, intr, + rvu_read64(rvu, block->addr, NIX_AF_AQ_STATUS), + result->compcode); return -EBUSY; + } } if (result->compcode != NIX_AQ_COMP_GOOD) { /* TODO: Replace this with some error code */ + dev_err(rvu->dev, "AQ failed with error:%d\n", result->compcode); if (result->compcode == NIX_AQ_COMP_CTX_FAULT || result->compcode == NIX_AQ_COMP_LOCKERR || result->compcode == NIX_AQ_COMP_CTX_POISON) { + dev_err(rvu->dev, "AQ failed due to cache line error:%d\n", + result->compcode); ret = rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NIX0_RX); ret |= rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NIX0_TX); ret |= rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NIX1_RX); @@ -1210,7 +1568,9 @@ static int nix_lf_hwctx_disable(struct rvu *rvu, struct hwctx_disable_req *req) aq_req.cq.ena = 0; aq_req.cq_mask.ena = 1; aq_req.cq.bp_ena = 0; + aq_req.cq.lbp_ena = 0; aq_req.cq_mask.bp_ena = 1; + aq_req.cq_mask.lbp_ena = 1; q_cnt = pfvf->cq_ctx->qsize; bmap = pfvf->cq_bmap; } @@ -1292,6 +1652,8 @@ int rvu_mbox_handler_nix_aq_enq(struct rvu *rvu, return rvu_nix_aq_enq_inst(rvu, req, rsp); } #endif +EXPORT_SYMBOL(rvu_mbox_handler_nix_aq_enq); + /* CN10K mbox handler */ int rvu_mbox_handler_nix_cn10k_aq_enq(struct rvu *rvu, struct nix_cn10k_aq_enq_req *req, @@ -1300,6 +1662,7 @@ int rvu_mbox_handler_nix_cn10k_aq_enq(struct rvu *rvu, return rvu_nix_aq_enq_inst(rvu, (struct nix_aq_enq_req *)req, (struct nix_aq_enq_rsp *)rsp); } +EXPORT_SYMBOL(rvu_mbox_handler_nix_cn10k_aq_enq); int rvu_mbox_handler_nix_hwctx_disable(struct rvu *rvu, struct hwctx_disable_req *req, @@ -1313,10 +1676,10 @@ int rvu_mbox_handler_nix_lf_alloc(struct rvu *rvu, struct nix_lf_alloc_rsp *rsp) { int nixlf, qints, hwctx_size, intf, err, rc = 0; + struct rvu_pfvf *pfvf, *parent_pf; struct rvu_hwinfo *hw = rvu->hw; u16 pcifunc = req->hdr.pcifunc; struct rvu_block *block; - struct rvu_pfvf *pfvf; u64 cfg, ctx_cfg; int blkaddr; @@ -1326,6 +1689,7 @@ int rvu_mbox_handler_nix_lf_alloc(struct rvu *rvu, if (req->way_mask) req->way_mask &= 0xFFFF; + parent_pf = &rvu->pf[rvu_get_pf(pcifunc)]; pfvf = rvu_get_pfvf(rvu, pcifunc); blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); if (!pfvf->nixlf || blkaddr < 0) @@ -1484,8 +1848,10 @@ int rvu_mbox_handler_nix_lf_alloc(struct rvu *rvu, rvu_write64(rvu, blkaddr, NIX_AF_LFX_RX_CFG(nixlf), req->rx_cfg); /* Configure pkind for TX parse config */ - cfg = NPC_TX_DEF_PKIND; - rvu_write64(rvu, blkaddr, NIX_AF_LFX_TX_PARSE_CFG(nixlf), cfg); + if (rvu_cgx_is_pkind_config_permitted(rvu, pcifunc)) { + cfg = NPC_TX_DEF_PKIND; + rvu_write64(rvu, blkaddr, NIX_AF_LFX_TX_PARSE_CFG(nixlf), cfg); + } intf = is_afvf(pcifunc) ? NIX_INTF_TYPE_LBK : NIX_INTF_TYPE_CGX; if (is_sdp_pfvf(pcifunc)) @@ -1503,6 +1869,10 @@ int rvu_mbox_handler_nix_lf_alloc(struct rvu *rvu, rvu_write64(rvu, blkaddr, NIX_AF_LFX_RX_VTAG_TYPEX(nixlf, NIX_AF_LFX_RX_VTAG_TYPE7), VTAGSIZE_T4 | VTAG_STRIP); + /* Configure RX VTAG Type 6 (strip) for fdsa */ + rvu_write64(rvu, blkaddr, + NIX_AF_LFX_RX_VTAG_TYPEX(nixlf, NIX_AF_LFX_RX_VTAG_TYPE6), + VTAGSIZE_T4 | VTAG_STRIP | VTAG_CAPTURE); goto exit; @@ -1531,6 +1901,7 @@ exit: cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST2); rsp->qints = ((cfg >> 12) & 0xFFF); rsp->cints = ((cfg >> 24) & 0xFFF); + rsp->hw_rx_tstamp_en = parent_pf->hw_rx_tstamp_en; rsp->cgx_links = hw->cgx_links; rsp->lbk_links = hw->lbk_links; rsp->sdp_links = hw->sdp_links; @@ -1562,6 +1933,9 @@ int rvu_mbox_handler_nix_lf_free(struct rvu *rvu, struct nix_lf_free_req *req, else rvu_npc_free_mcam_entries(rvu, pcifunc, nixlf); + /* Reset SPI to SA index table */ + rvu_nix_free_spi_to_sa_table(rvu, pcifunc); + /* Free any tx vtag def entries used by this NIX LF */ if (!(req->flags & NIX_LF_DONT_FREE_TX_VTAG)) nix_free_tx_vtag_entries(rvu, pcifunc); @@ -1707,6 +2081,42 @@ exit: return true; } +static void nix_reset_tx_schedule(struct rvu *rvu, int blkaddr, + int lvl, int schq) +{ + u64 tlx_parent = 0, tlx_schedule = 0; + + switch (lvl) { + case NIX_TXSCH_LVL_TL2: + tlx_parent = NIX_AF_TL2X_PARENT(schq); + tlx_schedule = NIX_AF_TL2X_SCHEDULE(schq); + break; + case NIX_TXSCH_LVL_TL3: + tlx_parent = NIX_AF_TL3X_PARENT(schq); + tlx_schedule = NIX_AF_TL3X_SCHEDULE(schq); + break; + case NIX_TXSCH_LVL_TL4: + tlx_parent = NIX_AF_TL4X_PARENT(schq); + tlx_schedule = NIX_AF_TL4X_SCHEDULE(schq); + break; + case NIX_TXSCH_LVL_MDQ: + /* no need to reset SMQ_CFG as HW clears this CSR + * on SMQ flush + */ + tlx_parent = NIX_AF_MDQX_PARENT(schq); + tlx_schedule = NIX_AF_MDQX_SCHEDULE(schq); + break; + default: + return; + } + + if (tlx_parent) + rvu_write64(rvu, blkaddr, tlx_parent, 0x0); + + if (tlx_schedule) + rvu_write64(rvu, blkaddr, tlx_schedule, 0x0); +} + /* Disable shaping of pkts by a scheduler queue * at a given scheduler level. */ @@ -1996,6 +2406,7 @@ int rvu_mbox_handler_nix_txsch_alloc(struct rvu *rvu, { struct rvu_hwinfo *hw = rvu->hw; u16 pcifunc = req->hdr.pcifunc; + struct rvu_pfvf *parent_pf; int link, blkaddr, rc = 0; int lvl, idx, start, end; struct nix_txsch *txsch; @@ -2012,6 +2423,8 @@ int rvu_mbox_handler_nix_txsch_alloc(struct rvu *rvu, if (!nix_hw) return NIX_AF_ERR_INVALID_NIXBLK; + parent_pf = &rvu->pf[rvu_get_pf(pcifunc)]; + mutex_lock(&rvu->rsrc_lock); /* Check if request is valid as per HW capabilities @@ -2056,6 +2469,7 @@ int rvu_mbox_handler_nix_txsch_alloc(struct rvu *rvu, pfvf_map[schq] = TXSCH_MAP(pcifunc, 0); nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq); nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq); + nix_reset_tx_schedule(rvu, blkaddr, lvl, schq); } for (idx = 0; idx < req->schq[lvl]; idx++) { @@ -2065,11 +2479,12 @@ int rvu_mbox_handler_nix_txsch_alloc(struct rvu *rvu, pfvf_map[schq] = TXSCH_MAP(pcifunc, 0); nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq); nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq); + nix_reset_tx_schedule(rvu, blkaddr, lvl, schq); } } rsp->aggr_level = hw->cap.nix_tx_aggr_lvl; - rsp->aggr_lvl_rr_prio = TXSCH_TL1_DFLT_RR_PRIO; + rsp->aggr_lvl_rr_prio = parent_pf->tl1_rr_prio; rsp->link_cfg_lvl = rvu_read64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL) & 0x01 ? NIX_TXSCH_LVL_TL3 : NIX_TXSCH_LVL_TL2; @@ -2081,13 +2496,156 @@ exit: return rc; } +static void nix_smq_flush_fill_ctx(struct rvu *rvu, int blkaddr, int smq, + struct nix_smq_flush_ctx *smq_flush_ctx) +{ + struct nix_smq_tree_ctx *smq_tree_ctx; + u64 parent_off, regval; + u16 schq; + int lvl; + + smq_flush_ctx->smq = smq; + + schq = smq; + for (lvl = NIX_TXSCH_LVL_SMQ; lvl <= NIX_TXSCH_LVL_TL1; lvl++) { + smq_tree_ctx = &smq_flush_ctx->smq_tree_ctx[lvl]; + if (lvl == NIX_TXSCH_LVL_TL1) { + smq_flush_ctx->tl1_schq = schq; + smq_tree_ctx->cir_off = NIX_AF_TL1X_CIR(schq); + smq_tree_ctx->pir_off = 0; + smq_tree_ctx->pir_val = 0; + parent_off = 0; + } else if (lvl == NIX_TXSCH_LVL_TL2) { + smq_flush_ctx->tl2_schq = schq; + smq_tree_ctx->cir_off = NIX_AF_TL2X_CIR(schq); + smq_tree_ctx->pir_off = NIX_AF_TL2X_PIR(schq); + parent_off = NIX_AF_TL2X_PARENT(schq); + } else if (lvl == NIX_TXSCH_LVL_TL3) { + smq_flush_ctx->tl3_schq = schq; + smq_tree_ctx->cir_off = NIX_AF_TL3X_CIR(schq); + smq_tree_ctx->pir_off = NIX_AF_TL3X_PIR(schq); + parent_off = NIX_AF_TL3X_PARENT(schq); + } else if (lvl == NIX_TXSCH_LVL_TL4) { + smq_flush_ctx->tl4_schq = schq; + smq_tree_ctx->cir_off = NIX_AF_TL4X_CIR(schq); + smq_tree_ctx->pir_off = NIX_AF_TL4X_PIR(schq); + parent_off = NIX_AF_TL4X_PARENT(schq); + } else if (lvl == NIX_TXSCH_LVL_MDQ) { + smq_tree_ctx->cir_off = NIX_AF_MDQX_CIR(schq); + smq_tree_ctx->pir_off = NIX_AF_MDQX_PIR(schq); + parent_off = NIX_AF_MDQX_PARENT(schq); + } + /* save cir/pir register values */ + smq_tree_ctx->cir_val = rvu_read64(rvu, blkaddr, smq_tree_ctx->cir_off); + if (smq_tree_ctx->pir_off) + smq_tree_ctx->pir_val = rvu_read64(rvu, blkaddr, smq_tree_ctx->pir_off); + + /* get parent txsch node */ + if (parent_off) { + regval = rvu_read64(rvu, blkaddr, parent_off); + schq = (regval >> 16) & 0x1FF; + } + } +} + +static void nix_dump_smq_status(struct rvu *rvu, int blkaddr, struct nix_smq_flush_ctx *ctx) +{ + dev_info(rvu->dev, "smq:%d tl1_schq:%d tl2:%d tl3:%d tl4:%d\n", ctx->smq, ctx->tl1_schq, + ctx->tl2_schq, ctx->tl3_schq, ctx->tl4_schq); + + dev_info(rvu->dev, "NIX_AF_SMQX_CFG:0x%llx\n", + rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(ctx->smq))); + dev_info(rvu->dev, "NIX_AF_SMQX_STATUS:0x%llx\n", + rvu_read64(rvu, blkaddr, NIX_AF_SMQX_STATUS(ctx->smq))); + dev_info(rvu->dev, "NIX_AF_MDQX_MD_COUNT:0x%llx\n", + rvu_read64(rvu, blkaddr, NIX_AF_MDQX_MD_COUNT)); + dev_info(rvu->dev, "NIX_AF_MDQX_IN_MD_COUNT:0x%llx\n", + rvu_read64(rvu, blkaddr, NIX_AF_MDQX_IN_MD_COUNT(ctx->smq))); + dev_info(rvu->dev, "NIX_AF_MDQX_OUT_MD_COUNT:0x%llx\n", + rvu_read64(rvu, blkaddr, NIX_AF_MDQX_OUT_MD_COUNT(ctx->smq))); + dev_info(rvu->dev, "NIX_AF_TL1X_SW_XOFF:0x%llx\n", + rvu_read64(rvu, blkaddr, NIX_AF_TL1X_SW_XOFF(ctx->tl1_schq))); + dev_info(rvu->dev, "NIX_AF_TL2X_SW_XOFF=0x%llx\n", + rvu_read64(rvu, blkaddr, NIX_AF_TL2X_SW_XOFF(ctx->tl2_schq))); +} + +static void nix_smq_flush_enadis_xoff(struct rvu *rvu, int blkaddr, + struct nix_smq_flush_ctx *smq_flush_ctx, bool enable) +{ + struct nix_txsch *txsch; + struct nix_hw *nix_hw; + u64 regoff; + int tl2; + + nix_hw = get_nix_hw(rvu->hw, blkaddr); + if (!nix_hw) + return; + + /* loop through all TL2s with matching PF_FUNC */ + txsch = &nix_hw->txsch[NIX_TXSCH_LVL_TL2]; + for (tl2 = 0; tl2 < txsch->schq.max; tl2++) { + /* skip the smq(flush) TL2 */ + if (tl2 == smq_flush_ctx->tl2_schq) + continue; + /* skip unused TL2s */ + if (TXSCH_MAP_FLAGS(txsch->pfvf_map[tl2]) & NIX_TXSCHQ_FREE) + continue; + /* skip if PF_FUNC doesn't match */ + if ((TXSCH_MAP_FUNC(txsch->pfvf_map[tl2]) & ~RVU_PFVF_FUNC_MASK) != + (TXSCH_MAP_FUNC(txsch->pfvf_map[smq_flush_ctx->tl2_schq] & + ~RVU_PFVF_FUNC_MASK))) + continue; + /* enable/disable XOFF */ + regoff = NIX_AF_TL2X_SW_XOFF(tl2); + if (enable) + rvu_write64(rvu, blkaddr, regoff, 0x1); + else + rvu_write64(rvu, blkaddr, regoff, 0x0); + } +} + +static void nix_smq_flush_enadis_rate(struct rvu *rvu, int blkaddr, + struct nix_smq_flush_ctx *smq_flush_ctx, bool enable) +{ + u64 cir_off, pir_off, cir_val, pir_val; + struct nix_smq_tree_ctx *smq_tree_ctx; + int lvl; + + for (lvl = NIX_TXSCH_LVL_SMQ; lvl <= NIX_TXSCH_LVL_TL1; lvl++) { + smq_tree_ctx = &smq_flush_ctx->smq_tree_ctx[lvl]; + cir_off = smq_tree_ctx->cir_off; + cir_val = smq_tree_ctx->cir_val; + pir_off = smq_tree_ctx->pir_off; + pir_val = smq_tree_ctx->pir_val; + + if (enable) { + rvu_write64(rvu, blkaddr, cir_off, cir_val); + if (lvl != NIX_TXSCH_LVL_TL1) + rvu_write64(rvu, blkaddr, pir_off, pir_val); + } else { + rvu_write64(rvu, blkaddr, cir_off, 0x0); + if (lvl != NIX_TXSCH_LVL_TL1) + rvu_write64(rvu, blkaddr, pir_off, 0x0); + } + } +} + static int nix_smq_flush(struct rvu *rvu, int blkaddr, int smq, u16 pcifunc, int nixlf) { + struct nix_smq_flush_ctx *smq_flush_ctx; int pf = rvu_get_pf(pcifunc); u8 cgx_id = 0, lmac_id = 0; int err, restore_tx_en = 0; u64 cfg; + u8 link; + + if (!is_rvu_otx2(rvu)) { + /* Skip SMQ flush if pkt count is zero */ + cfg = rvu_read64(rvu, blkaddr, NIX_AF_MDQX_IN_MD_COUNT(smq)); + if (!cfg) + return 0; + } /* enable cgx tx if disabled */ if (is_pf_cgxmapped(rvu, pf)) { @@ -2096,6 +2654,14 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr, lmac_id, true); } + /* XOFF all TL2s whose parent TL1 matches SMQ tree TL1 */ + smq_flush_ctx = kzalloc(sizeof(*smq_flush_ctx), GFP_KERNEL); + if (!smq_flush_ctx) + return -ENOMEM; + nix_smq_flush_fill_ctx(rvu, blkaddr, smq, smq_flush_ctx); + nix_smq_flush_enadis_xoff(rvu, blkaddr, smq_flush_ctx, true); + nix_smq_flush_enadis_rate(rvu, blkaddr, smq_flush_ctx, false); + cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq)); /* Do SMQ flush and set enqueue xoff */ cfg |= BIT_ULL(50) | BIT_ULL(49); @@ -2109,14 +2675,27 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr, /* Wait for flush to complete */ err = rvu_poll_reg(rvu, blkaddr, NIX_AF_SMQX_CFG(smq), BIT_ULL(49), true); - if (err) - dev_err(rvu->dev, - "NIXLF%d: SMQ%d flush failed\n", nixlf, smq); + if (err) { + dev_info(rvu->dev, + "NIXLF%d: SMQ%d flush failed, txlink might be busy\n", + nixlf, smq); + + nix_dump_smq_status(rvu, blkaddr, smq_flush_ctx); + link = (cgx_id * rvu->hw->lmac_per_cgx) + lmac_id; + dev_info(rvu->dev, "NIX_AF_TX_LINKX_NORM_CREDIT:0x%llx\n", + rvu_read64(rvu, blkaddr, NIX_AF_TX_LINKX_NORM_CREDIT(link))); + } + + /* clear XOFF on TL2s */ + nix_smq_flush_enadis_rate(rvu, blkaddr, smq_flush_ctx, true); + nix_smq_flush_enadis_xoff(rvu, blkaddr, smq_flush_ctx, false); + kfree(smq_flush_ctx); rvu_cgx_enadis_rx_bp(rvu, pf, true); /* restore cgx tx state */ if (restore_tx_en) rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false); + return err; } @@ -2153,6 +2732,7 @@ static int nix_txschq_free(struct rvu *rvu, u16 pcifunc) continue; nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq); nix_clear_tx_xoff(rvu, blkaddr, lvl, schq); + nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq); } } nix_clear_tx_xoff(rvu, blkaddr, NIX_TXSCH_LVL_TL1, @@ -2191,15 +2771,14 @@ static int nix_txschq_free(struct rvu *rvu, u16 pcifunc) for (schq = 0; schq < txsch->schq.max; schq++) { if (TXSCH_MAP_FUNC(txsch->pfvf_map[schq]) != pcifunc) continue; + nix_reset_tx_schedule(rvu, blkaddr, lvl, schq); rvu_free_rsrc(&txsch->schq, schq); txsch->pfvf_map[schq] = TXSCH_MAP(0, NIX_TXSCHQ_FREE); } } mutex_unlock(&rvu->rsrc_lock); - /* Sync cached info for this LF in NDC-TX to LLC/DRAM */ - rvu_write64(rvu, blkaddr, NIX_AF_NDC_TX_SYNC, BIT_ULL(12) | nixlf); - err = rvu_poll_reg(rvu, blkaddr, NIX_AF_NDC_TX_SYNC, BIT_ULL(12), true); + err = rvu_ndc_sync(rvu, blkaddr, nixlf, NIX_AF_NDC_TX_SYNC); if (err) dev_err(rvu->dev, "NDC-TX sync failed for NIXLF %d\n", nixlf); @@ -2250,6 +2829,9 @@ static int nix_txschq_free_one(struct rvu *rvu, */ nix_clear_tx_xoff(rvu, blkaddr, lvl, schq); + nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq); + nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq); + /* Flush if it is a SMQ. Onus of disabling * TL2/3 queue links before SMQ flush is on user */ @@ -2259,6 +2841,8 @@ static int nix_txschq_free_one(struct rvu *rvu, goto err; } + nix_reset_tx_schedule(rvu, blkaddr, lvl, schq); + /* Free the resource */ rvu_free_rsrc(&txsch->schq, schq); txsch->pfvf_map[schq] = TXSCH_MAP(0, NIX_TXSCHQ_FREE); @@ -2361,7 +2945,9 @@ static bool is_txschq_shaping_valid(struct rvu_hwinfo *hw, int lvl, u64 reg) static void nix_tl1_default_cfg(struct rvu *rvu, struct nix_hw *nix_hw, u16 pcifunc, int blkaddr) { + struct rvu_pfvf *parent_pf = &rvu->pf[rvu_get_pf(pcifunc)]; u32 *pfvf_map; + int schq; schq = nix_get_tx_link(rvu, pcifunc); @@ -2370,7 +2956,7 @@ static void nix_tl1_default_cfg(struct rvu *rvu, struct nix_hw *nix_hw, if (TXSCH_MAP_FLAGS(pfvf_map[schq]) & NIX_TXSCHQ_CFG_DONE) return; rvu_write64(rvu, blkaddr, NIX_AF_TL1X_TOPOLOGY(schq), - (TXSCH_TL1_DFLT_RR_PRIO << 1)); + (parent_pf->tl1_rr_prio << 1)); /* On OcteonTx2 the config was in bytes and newer silcons * it's changed to weight. @@ -2413,17 +2999,19 @@ static int nix_txschq_cfg_read(struct rvu *rvu, struct nix_hw *nix_hw, return 0; } -static void rvu_nix_tx_tl2_cfg(struct rvu *rvu, int blkaddr, - u16 pcifunc, struct nix_txsch *txsch) +void rvu_nix_tx_tl2_cfg(struct rvu *rvu, int blkaddr, u16 pcifunc, + struct nix_txsch *txsch, bool enable) { struct rvu_hwinfo *hw = rvu->hw; int lbk_link_start, lbk_links; u8 pf = rvu_get_pf(pcifunc); int schq; + u64 cfg; if (!is_pf_cgxmapped(rvu, pf)) return; + cfg = enable ? (BIT_ULL(12) | RVU_SWITCH_LBK_CHAN) : 0; lbk_link_start = hw->cgx_links; for (schq = 0; schq < txsch->schq.max; schq++) { @@ -2437,8 +3025,7 @@ static void rvu_nix_tx_tl2_cfg(struct rvu *rvu, int blkaddr, rvu_write64(rvu, blkaddr, NIX_AF_TL3_TL2X_LINKX_CFG(schq, lbk_link_start + - lbk_links), - BIT_ULL(12) | RVU_SWITCH_LBK_CHAN); + lbk_links), cfg); } } @@ -2544,8 +3131,6 @@ int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu, rvu_write64(rvu, blkaddr, reg, regval); } - rvu_nix_tx_tl2_cfg(rvu, blkaddr, pcifunc, - &nix_hw->txsch[NIX_TXSCH_LVL_TL2]); return 0; } @@ -2558,8 +3143,8 @@ static int nix_rx_vtag_cfg(struct rvu *rvu, int nixlf, int blkaddr, req->vtag_size > VTAGSIZE_T8) return -EINVAL; - /* RX VTAG Type 7 reserved for vf vlan */ - if (req->rx.vtag_type == NIX_AF_LFX_RX_VTAG_TYPE7) + /* RX VTAG Type 7,6 are reserved for vf vlan& FDSA tag strip */ + if (req->rx.vtag_type >= NIX_AF_LFX_RX_VTAG_TYPE6) return NIX_AF_ERR_RX_VTAG_INUSE; if (req->rx.capture_vtag) @@ -2765,7 +3350,8 @@ int rvu_mbox_handler_nix_vtag_cfg(struct rvu *rvu, } static int nix_blk_setup_mce(struct rvu *rvu, struct nix_hw *nix_hw, - int mce, u8 op, u16 pcifunc, int next, bool eol) + int mce, u8 op, u16 pcifunc, int next, + int index, u8 mce_op, bool eol) { struct nix_aq_enq_req aq_req; int err; @@ -2776,8 +3362,8 @@ static int nix_blk_setup_mce(struct rvu *rvu, struct nix_hw *nix_hw, aq_req.qidx = mce; /* Use RSS with RSS index 0 */ - aq_req.mce.op = 1; - aq_req.mce.index = 0; + aq_req.mce.op = mce_op; + aq_req.mce.index = index; aq_req.mce.eol = eol; aq_req.mce.pf_func = pcifunc; aq_req.mce.next = next; @@ -2794,6 +3380,206 @@ static int nix_blk_setup_mce(struct rvu *rvu, struct nix_hw *nix_hw, return 0; } +static void nix_delete_mcast_mce_list(struct nix_mce_list *mce_list) +{ + struct hlist_node *tmp; + struct mce *mce; + + /* Scan through the current list */ + hlist_for_each_entry_safe(mce, tmp, &mce_list->head, node) { + hlist_del(&mce->node); + kfree(mce); + } + + mce_list->count = 0; + mce_list->max = 0; +} + +static int nix_get_last_mce_list_index(struct nix_mcast_grp_elem *elem) +{ + return elem->mce_start_index + elem->mcast_mce_list.count - 1; +} + +static int nix_update_ingress_mce_list_hw(struct rvu *rvu, + struct nix_hw *nix_hw, + struct nix_mcast_grp_elem *elem) +{ + int idx, last_idx, next_idx, err; + struct nix_mce_list *mce_list; + struct mce *mce, *prev_mce; + + mce_list = &elem->mcast_mce_list; + idx = elem->mce_start_index; + last_idx = nix_get_last_mce_list_index(elem); + hlist_for_each_entry(mce, &mce_list->head, node) { + if (idx > last_idx) + break; + + if (!mce->is_active) { + if (idx == elem->mce_start_index) { + idx++; + prev_mce = mce; + elem->mce_start_index = idx; + continue; + } else if (idx == last_idx) { + err = nix_blk_setup_mce(rvu, nix_hw, idx - 1, NIX_AQ_INSTOP_WRITE, + prev_mce->pcifunc, next_idx, + prev_mce->rq_rss_index, + prev_mce->dest_type, + false); + if (err) + return err; + + break; + } + } + + next_idx = idx + 1; + /* EOL should be set in last MCE */ + err = nix_blk_setup_mce(rvu, nix_hw, idx, NIX_AQ_INSTOP_WRITE, + mce->pcifunc, next_idx, + mce->rq_rss_index, mce->dest_type, + (next_idx > last_idx) ? true : false); + if (err) + return err; + + idx++; + prev_mce = mce; + } + + return 0; +} + +static void nix_update_egress_mce_list_hw(struct rvu *rvu, + struct nix_hw *nix_hw, + struct nix_mcast_grp_elem *elem) +{ + struct nix_mce_list *mce_list; + int idx, last_idx, next_idx; + struct mce *mce, *prev_mce; + u64 regval; + u8 eol; + + mce_list = &elem->mcast_mce_list; + idx = elem->mce_start_index; + last_idx = nix_get_last_mce_list_index(elem); + hlist_for_each_entry(mce, &mce_list->head, node) { + if (idx > last_idx) + break; + + if (!mce->is_active) { + if (idx == elem->mce_start_index) { + idx++; + prev_mce = mce; + elem->mce_start_index = idx; + continue; + } else if (idx == last_idx) { + regval = (next_idx << 16) | (1 << 12) | prev_mce->channel; + rvu_write64(rvu, nix_hw->blkaddr, + NIX_AF_TX_MCASTX(idx - 1), + regval); + break; + } + } + + eol = 0; + next_idx = idx + 1; + /* EOL should be set in last MCE */ + if (next_idx > last_idx) + eol = 1; + + regval = (next_idx << 16) | (eol << 12) | mce->channel; + rvu_write64(rvu, nix_hw->blkaddr, + NIX_AF_TX_MCASTX(idx), + regval); + idx++; + prev_mce = mce; + } +} + +static int nix_del_mce_list_entry(struct rvu *rvu, + struct nix_hw *nix_hw, + struct nix_mcast_grp_elem *elem, + struct nix_mcast_grp_update_req *req) +{ + u32 num_entry = req->num_mce_entry; + struct nix_mce_list *mce_list; + struct mce *mce; + bool is_found; + int i; + + mce_list = &elem->mcast_mce_list; + for (i = 0; i < num_entry; i++) { + is_found = false; + hlist_for_each_entry(mce, &mce_list->head, node) { + /* If already exists, then delete */ + if (mce->pcifunc == req->pcifunc[i]) { + hlist_del(&mce->node); + kfree(mce); + mce_list->count--; + is_found = true; + break; + } + } + + if (!is_found) + return NIX_AF_ERR_INVALID_MCAST_DEL_REQ; + } + + mce_list->max = mce_list->count; + /* Dump the updated list to HW */ + if (elem->dir == NIX_MCAST_INGRESS) + return nix_update_ingress_mce_list_hw(rvu, nix_hw, elem); + + nix_update_egress_mce_list_hw(rvu, nix_hw, elem); + return 0; +} + +static int nix_add_mce_list_entry(struct rvu *rvu, + struct nix_hw *nix_hw, + struct nix_mcast_grp_elem *elem, + struct nix_mcast_grp_update_req *req) +{ + u32 num_entry = req->num_mce_entry; + struct nix_mce_list *mce_list; + struct hlist_node *tmp; + struct mce *mce; + int i; + + mce_list = &elem->mcast_mce_list; + for (i = 0; i < num_entry; i++) { + mce = kzalloc(sizeof(*mce), GFP_KERNEL); + if (!mce) + goto free_mce; + + mce->pcifunc = req->pcifunc[i]; + mce->channel = req->channel[i]; + mce->rq_rss_index = req->rq_rss_index[i]; + mce->dest_type = req->dest_type[i]; + mce->is_active = 1; + hlist_add_head(&mce->node, &mce_list->head); + mce_list->count++; + } + + mce_list->max += num_entry; + + /* Dump the updated list to HW */ + if (elem->dir == NIX_MCAST_INGRESS) + return nix_update_ingress_mce_list_hw(rvu, nix_hw, elem); + + nix_update_egress_mce_list_hw(rvu, nix_hw, elem); + return 0; + +free_mce: + hlist_for_each_entry_safe(mce, tmp, &mce_list->head, node) { + hlist_del(&mce->node); + kfree(mce); + mce_list->count--; + } + + return -ENOMEM; +} + static int nix_update_mce_list_entry(struct nix_mce_list *mce_list, u16 pcifunc, bool add) { @@ -2889,6 +3675,7 @@ int nix_update_mce_list(struct rvu *rvu, u16 pcifunc, /* EOL should be set in last MCE */ err = nix_blk_setup_mce(rvu, nix_hw, idx, NIX_AQ_INSTOP_WRITE, mce->pcifunc, next_idx, + 0, 1, (next_idx > last_idx) ? true : false); if (err) goto end; @@ -2969,6 +3756,16 @@ static int nix_update_mce_rule(struct rvu *rvu, u16 pcifunc, return err; } +static void nix_setup_mcast_grp(struct nix_hw *nix_hw) +{ + struct nix_mcast_grp *mcast_grp = &nix_hw->mcast_grp; + + INIT_LIST_HEAD(&mcast_grp->mcast_grp_head); + mutex_init(&mcast_grp->mcast_grp_lock); + mcast_grp->next_grp_index = 1; + mcast_grp->count = 0; +} + static int nix_setup_mce_tables(struct rvu *rvu, struct nix_hw *nix_hw) { struct nix_mcast *mcast = &nix_hw->mcast; @@ -2993,15 +3790,15 @@ static int nix_setup_mce_tables(struct rvu *rvu, struct nix_hw *nix_hw) continue; /* save start idx of broadcast mce list */ - pfvf->bcast_mce_idx = nix_alloc_mce_list(mcast, numvfs + 1); + pfvf->bcast_mce_idx = nix_alloc_mce_list(mcast, numvfs + 1, NIX_MCAST_INGRESS); nix_mce_list_init(&pfvf->bcast_mce_list, numvfs + 1); /* save start idx of multicast mce list */ - pfvf->mcast_mce_idx = nix_alloc_mce_list(mcast, numvfs + 1); + pfvf->mcast_mce_idx = nix_alloc_mce_list(mcast, numvfs + 1, NIX_MCAST_INGRESS); nix_mce_list_init(&pfvf->mcast_mce_list, numvfs + 1); /* save the start idx of promisc mce list */ - pfvf->promisc_mce_idx = nix_alloc_mce_list(mcast, numvfs + 1); + pfvf->promisc_mce_idx = nix_alloc_mce_list(mcast, numvfs + 1, NIX_MCAST_INGRESS); nix_mce_list_init(&pfvf->promisc_mce_list, numvfs + 1); for (idx = 0; idx < (numvfs + 1); idx++) { @@ -3016,7 +3813,7 @@ static int nix_setup_mce_tables(struct rvu *rvu, struct nix_hw *nix_hw) err = nix_blk_setup_mce(rvu, nix_hw, pfvf->bcast_mce_idx + idx, NIX_AQ_INSTOP_INIT, - pcifunc, 0, true); + pcifunc, 0, 0, 1, true); if (err) return err; @@ -3024,7 +3821,7 @@ static int nix_setup_mce_tables(struct rvu *rvu, struct nix_hw *nix_hw) err = nix_blk_setup_mce(rvu, nix_hw, pfvf->mcast_mce_idx + idx, NIX_AQ_INSTOP_INIT, - pcifunc, 0, true); + pcifunc, 0, 0, 1, true); if (err) return err; @@ -3032,7 +3829,7 @@ static int nix_setup_mce_tables(struct rvu *rvu, struct nix_hw *nix_hw) err = nix_blk_setup_mce(rvu, nix_hw, pfvf->promisc_mce_idx + idx, NIX_AQ_INSTOP_INIT, - pcifunc, 0, true); + pcifunc, 0, 0, 1, true); if (err) return err; } @@ -3047,11 +3844,25 @@ static int nix_setup_mcast(struct rvu *rvu, struct nix_hw *nix_hw, int blkaddr) int err, size; size = (rvu_read64(rvu, blkaddr, NIX_AF_CONST3) >> 16) & 0x0F; - size = (1ULL << size); + size = BIT_ULL(size); + + /* Allocate bitmap for rx mce entries */ + mcast->mce_counter[NIX_MCAST_INGRESS].max = 256UL << MC_TBL_SIZE; + err = rvu_alloc_bitmap(&mcast->mce_counter[NIX_MCAST_INGRESS]); + if (err) + return -ENOMEM; + + /* Allocate bitmap for tx mce entries */ + mcast->mce_counter[NIX_MCAST_EGRESS].max = MC_TX_MAX; + err = rvu_alloc_bitmap(&mcast->mce_counter[NIX_MCAST_EGRESS]); + if (err) { + rvu_free_bitmap(&mcast->mce_counter[NIX_MCAST_INGRESS]); + return -ENOMEM; + } /* Alloc memory for multicast/mirror replication entries */ err = qmem_alloc(rvu->dev, &mcast->mce_ctx, - (256UL << MC_TBL_SIZE), size); + mcast->mce_counter[NIX_MCAST_INGRESS].max, size); if (err) return -ENOMEM; @@ -3081,6 +3892,8 @@ static int nix_setup_mcast(struct rvu *rvu, struct nix_hw *nix_hw, int blkaddr) mutex_init(&mcast->mce_lock); + nix_setup_mcast_grp(nix_hw); + return nix_setup_mce_tables(rvu, nix_hw); } @@ -3156,10 +3969,16 @@ static int nix_setup_txschq(struct rvu *rvu, struct nix_hw *nix_hw, int blkaddr) } /* Setup a default value of 8192 as DWRR MTU */ - if (rvu->hw->cap.nix_common_dwrr_mtu) { - rvu_write64(rvu, blkaddr, NIX_AF_DWRR_RPM_MTU, + if (rvu->hw->cap.nix_common_dwrr_mtu || + rvu->hw->cap.nix_multiple_dwrr_mtu) { + rvu_write64(rvu, blkaddr, + nix_get_dwrr_mtu_reg(rvu->hw, SMQ_LINK_TYPE_RPM), + convert_bytes_to_dwrr_mtu(8192)); + rvu_write64(rvu, blkaddr, + nix_get_dwrr_mtu_reg(rvu->hw, SMQ_LINK_TYPE_LBK), convert_bytes_to_dwrr_mtu(8192)); - rvu_write64(rvu, blkaddr, NIX_AF_DWRR_SDP_MTU, + rvu_write64(rvu, blkaddr, + nix_get_dwrr_mtu_reg(rvu->hw, SMQ_LINK_TYPE_SDP), convert_bytes_to_dwrr_mtu(8192)); } @@ -3228,8 +4047,12 @@ static void rvu_get_lbk_link_max_frs(struct rvu *rvu, u16 *max_mtu) static void rvu_get_lmac_link_max_frs(struct rvu *rvu, u16 *max_mtu) { - /* RPM supports FIFO len 128 KB */ - if (rvu_cgx_get_fifolen(rvu) == 0x20000) + int fifo_size = rvu_cgx_get_fifolen(rvu); + + /* RPM supports FIFO len 128 KB and RPM2 supports double the + * FIFO len to accommodate 8 LMACS + */ + if (fifo_size == 0x20000 || fifo_size == 0x40000) *max_mtu = CN10K_LMAC_LINK_MAX_FRS; else *max_mtu = NIC_HW_MAX_FRS; @@ -3246,6 +4069,11 @@ int rvu_mbox_handler_nix_get_hw_info(struct rvu *rvu, struct msg_req *req, if (blkaddr < 0) return NIX_AF_ERR_AF_LF_INVALID; + rsp->vwqe_delay = 0; + if (!is_rvu_otx2(rvu)) + rsp->vwqe_delay = rvu_read64(rvu, blkaddr, NIX_AF_VWQE_TIMER) & + GENMASK_ULL(9, 0); + if (is_afvf(pcifunc)) rvu_get_lbk_link_max_frs(rvu, &rsp->max_mtu); else @@ -3253,19 +4081,28 @@ int rvu_mbox_handler_nix_get_hw_info(struct rvu *rvu, struct msg_req *req, rsp->min_mtu = NIC_HW_MIN_FRS; - if (!rvu->hw->cap.nix_common_dwrr_mtu) { + if (!rvu->hw->cap.nix_common_dwrr_mtu && + !rvu->hw->cap.nix_multiple_dwrr_mtu) { /* Return '1' on OTx2 */ rsp->rpm_dwrr_mtu = 1; rsp->sdp_dwrr_mtu = 1; + rsp->lbk_dwrr_mtu = 1; return 0; } - dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU); + /* Return DWRR_MTU for TLx_SCHEDULE[RR_WEIGHT] config */ + dwrr_mtu = rvu_read64(rvu, blkaddr, + nix_get_dwrr_mtu_reg(rvu->hw, SMQ_LINK_TYPE_RPM)); rsp->rpm_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu); - dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_SDP_MTU); + dwrr_mtu = rvu_read64(rvu, blkaddr, + nix_get_dwrr_mtu_reg(rvu->hw, SMQ_LINK_TYPE_SDP)); rsp->sdp_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu); + dwrr_mtu = rvu_read64(rvu, blkaddr, + nix_get_dwrr_mtu_reg(rvu->hw, SMQ_LINK_TYPE_LBK)); + rsp->lbk_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu); + return 0; } @@ -3314,6 +4151,7 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) struct nix_rx_flowkey_alg *field; struct nix_rx_flowkey_alg tmp; u32 key_type, valid_key; + u32 l3_l4_src_dst; int l4_key_offset = 0; if (!alg) @@ -3341,6 +4179,15 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) * group_member - Enabled when protocol is part of a group. */ + /* Last 4 bits (31:28) are reserved to specify SRC, DST + * selection for L3, L4 i.e IPV[4,6]_SRC, IPV[4,6]_DST, + * [TCP,UDP,SCTP]_SRC, [TCP,UDP,SCTP]_DST + * 31 => L3_SRC, 30 => L3_DST, 29 => L4_SRC, 28 => L4_DST + */ + l3_l4_src_dst = flow_cfg; + /* Reset these 4 bits, so that these won't be part of key */ + flow_cfg &= NIX_FLOW_KEY_TYPE_L3_L4_MASK; + keyoff_marker = 0; max_key_off = 0; group_member = 0; nr_field = 0; key_off = 0; field_marker = 1; field = &tmp; max_bit_pos = fls(flow_cfg); @@ -3378,6 +4225,22 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) } field->hdr_offset = 12; /* SIP offset */ field->bytesm1 = 7; /* SIP + DIP, 8 bytes */ + + /* Only SIP */ + if (l3_l4_src_dst & NIX_FLOW_KEY_TYPE_L3_SRC_ONLY) + field->bytesm1 = 3; /* SIP, 4 bytes */ + + if (l3_l4_src_dst & NIX_FLOW_KEY_TYPE_L3_DST_ONLY) { + /* Both SIP + DIP */ + if (field->bytesm1 == 3) { + field->bytesm1 = 7; /* SIP + DIP, 8B */ + } else { + /* Only DIP */ + field->hdr_offset = 16; /* DIP off */ + field->bytesm1 = 3; /* DIP, 4 bytes */ + } + } + field->ltype_mask = 0xF; /* Match only IPv4 */ keyoff_marker = false; break; @@ -3391,7 +4254,23 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) } field->hdr_offset = 8; /* SIP offset */ field->bytesm1 = 31; /* SIP + DIP, 32 bytes */ - field->ltype_mask = 0xF; /* Match only IPv6 */ + + /* Only SIP */ + if (l3_l4_src_dst & NIX_FLOW_KEY_TYPE_L3_SRC_ONLY) + field->bytesm1 = 15; /* SIP, 16 bytes */ + + if (l3_l4_src_dst & NIX_FLOW_KEY_TYPE_L3_DST_ONLY) { + /* Both SIP + DIP */ + if (field->bytesm1 == 15) { + /* SIP + DIP, 32 bytes */ + field->bytesm1 = 31; + } else { + /* Only DIP */ + field->hdr_offset = 24; /* DIP off */ + field->bytesm1 = 15; /* DIP,16 bytes */ + } + } + field->ltype_mask = 0xE; /* Match IPv6 and IPv6_ext */ break; case NIX_FLOW_KEY_TYPE_TCP: case NIX_FLOW_KEY_TYPE_UDP: @@ -3406,6 +4285,21 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) field->lid = NPC_LID_LH; field->bytesm1 = 3; /* Sport + Dport, 4 bytes */ + if (l3_l4_src_dst & NIX_FLOW_KEY_TYPE_L4_SRC_ONLY) + field->bytesm1 = 1; /* SRC, 2 bytes */ + + if (l3_l4_src_dst & NIX_FLOW_KEY_TYPE_L4_DST_ONLY) { + /* Both SRC + DST */ + if (field->bytesm1 == 1) { + /* SRC + DST, 4 bytes */ + field->bytesm1 = 3; + } else { + /* Only DIP */ + field->hdr_offset = 2; /* DST off */ + field->bytesm1 = 1; /* DST, 2 bytes */ + } + } + /* Enum values for NPC_LID_LD and NPC_LID_LG are same, * so no need to change the ltype_match, just change * the lid for inner protocols @@ -3516,6 +4410,20 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) field->ltype_match = NPC_LT_LE_GTPU; field->ltype_mask = 0xF; break; + case NIX_FLOW_KEY_TYPE_CH_LEN_90B: + field->lid = NPC_LID_LA; + field->hdr_offset = 24; + field->bytesm1 = 1; /* 2 Bytes*/ + field->ltype_match = NPC_LT_LA_CUSTOM_L2_90B_ETHER; + field->ltype_mask = 0xF; + break; + case NIX_FLOW_KEY_TYPE_CUSTOM0: + field->lid = NPC_LID_LC; + field->hdr_offset = 6; + field->bytesm1 = 1; /* 2 Bytes*/ + field->ltype_match = NPC_LT_LC_CUSTOM0; + field->ltype_mask = 0xF; + break; case NIX_FLOW_KEY_TYPE_VLAN: field->lid = NPC_LID_LB; field->hdr_offset = 2; /* Skip TPID (2-bytes) */ @@ -3708,7 +4616,7 @@ int rvu_mbox_handler_nix_set_mac_addr(struct rvu *rvu, struct nix_set_mac_addr *req, struct msg_rsp *rsp) { - bool from_vf = req->hdr.pcifunc & RVU_PFVF_FUNC_MASK; + bool from_vf = !!(req->hdr.pcifunc & RVU_PFVF_FUNC_MASK); u16 pcifunc = req->hdr.pcifunc; int blkaddr, nixlf, err; struct rvu_pfvf *pfvf; @@ -3820,14 +4728,13 @@ int rvu_mbox_handler_nix_set_rx_mode(struct rvu *rvu, struct nix_rx_mode *req, } /* install/uninstall promisc entry */ - if (promisc) { + if (promisc) rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf, pfvf->rx_chan_base, pfvf->rx_chan_cnt); - } else { + else if (!nix_rx_multicast) rvu_npc_enable_promisc_entry(rvu, pcifunc, nixlf, false); - } return 0; } @@ -3906,7 +4813,7 @@ int rvu_mbox_handler_nix_set_hw_frs(struct rvu *rvu, struct nix_frs_cfg *req, if (!req->sdp_link && req->maxlen > max_mtu) return NIX_AF_ERR_FRS_INVALID; - if (req->update_minlen && req->minlen < NIC_HW_MIN_FRS) + if (req->update_minlen && req->minlen < (req->sdp_link ? SDP_HW_MIN_FRS : NIC_HW_MIN_FRS)) return NIX_AF_ERR_FRS_INVALID; /* Check if config is for SDP link */ @@ -3965,6 +4872,11 @@ int rvu_mbox_handler_nix_set_rx_cfg(struct rvu *rvu, struct nix_rx_cfg *req, else cfg &= ~BIT_ULL(40); + if (req->len_verify & NIX_RX_DROP_RE) + cfg |= BIT_ULL(32); + else + cfg &= ~BIT_ULL(32); + if (req->csum_verify & BIT(0)) cfg |= BIT_ULL(37); else @@ -3994,6 +4906,9 @@ static void nix_link_config(struct rvu *rvu, int blkaddr, rvu_get_lbk_link_max_frs(rvu, &lbk_max_frs); rvu_get_lmac_link_max_frs(rvu, &lmac_max_frs); + /* Set SDP link credit */ + rvu_write64(rvu, blkaddr, NIX_AF_SDP_LINK_CREDIT, SDP_LINK_CREDIT); + /* Set default min/max packet lengths allowed on NIX Rx links. * * With HW reset minlen value of 60byte, HW will treat ARP pkts @@ -4005,14 +4920,30 @@ static void nix_link_config(struct rvu *rvu, int blkaddr, ((u64)lmac_max_frs << 16) | NIC_HW_MIN_FRS); } - for (link = hw->cgx_links; link < hw->lbk_links; link++) { + for (link = hw->cgx_links; link < hw->cgx_links + hw->lbk_links; link++) { rvu_write64(rvu, blkaddr, NIX_AF_RX_LINKX_CFG(link), ((u64)lbk_max_frs << 16) | NIC_HW_MIN_FRS); } if (hw->sdp_links) { link = hw->cgx_links + hw->lbk_links; rvu_write64(rvu, blkaddr, NIX_AF_RX_LINKX_CFG(link), - SDP_HW_MAX_FRS << 16 | NIC_HW_MIN_FRS); + SDP_HW_MAX_FRS << 16 | SDP_HW_MIN_FRS); + } + + /* Set CPT link i.e second pass config */ + if (hw->cpt_links) { + link = hw->cgx_links + hw->lbk_links + hw->sdp_links; + /* Set default min/max packet lengths allowed to LBK as that + * LBK link's range is max. + */ + rvu_write64(rvu, blkaddr, NIX_AF_RX_LINKX_CFG(link), + ((u64)lbk_max_frs << 16) | NIC_HW_MIN_FRS); + } + + /* Get MCS external bypass status for CN10K-B */ + if (mcs_get_blkcnt() == 1) { + /* Adjust for 2 credits when external bypass is disabled */ + nix_hw->cc_mcs_cnt = is_mcs_bypass(0) ? 0 : 2; } /* Set credits for Tx links assuming max packet length allowed. @@ -4027,7 +4958,7 @@ static void nix_link_config(struct rvu *rvu, int blkaddr, /* Get LMAC id's from bitmap */ lmac_bmap = cgx_get_lmac_bmap(rvu_cgx_pdata(cgx, rvu)); - for_each_set_bit(iter, &lmac_bmap, MAX_LMAC_PER_CGX) { + for_each_set_bit(iter, &lmac_bmap, rvu->hw->lmac_per_cgx) { lmac_fifo_len = rvu_cgx_get_lmac_fifolen(rvu, cgx, iter); if (!lmac_fifo_len) { dev_err(rvu->dev, @@ -4038,6 +4969,7 @@ static void nix_link_config(struct rvu *rvu, int blkaddr, tx_credits = (lmac_fifo_len - lmac_max_frs) / 16; /* Enable credits and set credit pkt count to max allowed */ cfg = (tx_credits << 12) | (0x1FF << 2) | BIT_ULL(1); + cfg |= (nix_hw->cc_mcs_cnt << 32); link = iter + slink; nix_hw->tx_credits[link] = tx_credits; @@ -4162,8 +5094,11 @@ static void rvu_nix_setup_capabilities(struct rvu *rvu, int blkaddr) * Check if HW uses a common MTU for all DWRR quantum configs. * On OcteonTx2 this register field is '0'. */ - if (((hw_const >> 56) & 0x10) == 0x10) + if ((((hw_const >> 56) & 0x10) == 0x10) && !(hw_const & BIT_ULL(61))) hw->cap.nix_common_dwrr_mtu = true; + + if (hw_const & BIT_ULL(61)) + hw->cap.nix_multiple_dwrr_mtu = true; } static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw) @@ -4184,18 +5119,24 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw) */ rvu_write64(rvu, blkaddr, NIX_AF_CFG, rvu_read64(rvu, blkaddr, NIX_AF_CFG) | 0x40ULL); + } - /* Set chan/link to backpressure TL3 instead of TL2 */ - rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01); + /* Set chan/link to backpressure TL3 instead of TL2 */ + rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01); - /* Disable SQ manager's sticky mode operation (set TM6 = 0) - * This sticky mode is known to cause SQ stalls when multiple - * SQs are mapped to same SMQ and transmitting pkts at a time. - */ - cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS); - cfg &= ~BIT_ULL(15); - rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg); - } + /* Disable SQ manager's sticky mode operation (set TM6 = 0, TM11 = 0) + * This sticky mode is known to cause SQ stalls when multiple + * SQs are mapped to same SMQ and transmitting pkts simultaneously. + * NIX PSE may dead lock when therea are any sticky to non-sticky + * transmission. Hence disable it (TM5 = 0). + */ + cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS); + cfg &= ~(BIT_ULL(15) | BIT_ULL(14) | BIT_ULL(23)); + /* NIX may drop credits when condition clocks are turned off. + * Hence enable control flow clk (set TM9 = 1). + */ + cfg |= BIT_ULL(21); + rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg); ltdefs = rvu->kpu.lt_def; /* Calibrate X2P bus to check if CGX/LBK links are fine */ @@ -4214,8 +5155,17 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw) /* Restore CINT timer delay to HW reset values */ rvu_write64(rvu, blkaddr, NIX_AF_CINT_DELAY, 0x0ULL); + cfg = rvu_read64(rvu, blkaddr, NIX_AF_SEB_CFG); + /* For better performance use NDC TX instead of NDC RX for SQ's SQEs" */ - rvu_write64(rvu, blkaddr, NIX_AF_SEB_CFG, 0x1ULL); + cfg |= 1ULL; + if (!is_rvu_otx2(rvu)) + cfg |= NIX_PTP_1STEP_EN; + + rvu_write64(rvu, blkaddr, NIX_AF_SEB_CFG, cfg); + + if (!is_rvu_otx2(rvu)) + rvu_nix_block_cn10k_init(rvu, nix_hw); if (is_block_implemented(hw, blkaddr)) { err = nix_setup_txschq(rvu, nix_hw, blkaddr); @@ -4238,6 +5188,10 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw) if (err) return err; + err = nix_setup_bpids(rvu, nix_hw, blkaddr); + if (err) + return err; + /* Configure segmentation offload formats */ nix_setup_lso(rvu, nix_hw, blkaddr); @@ -4327,6 +5281,19 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw) /* Enable Channel backpressure */ rvu_write64(rvu, blkaddr, NIX_AF_RX_CFG, BIT_ULL(0)); + if (is_block_implemented(rvu->hw, BLKADDR_CPT0)) { + /* Config IPSec headers identification */ + rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_IPSECX(0), + (ltdefs->rx_ipsec[0].lid << 8) | + (ltdefs->rx_ipsec[0].ltype_match << 4) | + ltdefs->rx_ipsec[0].ltype_mask); + + rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_IPSECX(1), + (ltdefs->rx_ipsec[1].spi_offset << 12) | + (ltdefs->rx_ipsec[1].lid << 8) | + (ltdefs->rx_ipsec[1].ltype_match << 4) | + ltdefs->rx_ipsec[1].ltype_mask); + } } return 0; } @@ -4408,6 +5375,74 @@ void rvu_nix_freemem(struct rvu *rvu) } } +static void nix_mcast_update_action(struct rvu *rvu, + struct nix_mcast_grp_elem *elem) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + struct nix_rx_action rx_action = { 0 }; + struct nix_tx_action tx_action = { 0 }; + int npc_blkaddr; + + npc_blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + if (elem->dir == NIX_MCAST_INGRESS) { + *(u64 *)&rx_action = npc_get_mcam_action(rvu, mcam, + npc_blkaddr, + elem->mcam_index); + rx_action.index = elem->mce_start_index; + npc_set_mcam_action(rvu, mcam, npc_blkaddr, elem->mcam_index, + *(u64 *)&rx_action); + } else { + *(u64 *)&tx_action = npc_get_mcam_action(rvu, mcam, + npc_blkaddr, + elem->mcam_index); + tx_action.index = elem->mce_start_index; + npc_set_mcam_action(rvu, mcam, npc_blkaddr, elem->mcam_index, + *(u64 *)&tx_action); + } +} + +static void nix_mcast_update_mce_entry(struct rvu *rvu, u16 pcifunc, u8 is_active) +{ + struct nix_mcast_grp_elem *elem; + struct nix_mcast_grp *mcast_grp; + struct nix_hw *nix_hw; + int blkaddr; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); + nix_hw = get_nix_hw(rvu->hw, blkaddr); + if (!nix_hw) + return; + + mcast_grp = &nix_hw->mcast_grp; + + mutex_lock(&mcast_grp->mcast_grp_lock); + list_for_each_entry(elem, &mcast_grp->mcast_grp_head, list) { + struct nix_mce_list *mce_list; + struct mce *mce; + + /* Iterate the group elements and disable the element which + * received the disable request. + */ + mce_list = &elem->mcast_mce_list; + hlist_for_each_entry(mce, &mce_list->head, node) { + if (mce->pcifunc == pcifunc) { + mce->is_active = is_active; + break; + } + } + + /* Dump the updated list to HW */ + if (elem->dir == NIX_MCAST_INGRESS) + nix_update_ingress_mce_list_hw(rvu, nix_hw, elem); + else + nix_update_egress_mce_list_hw(rvu, nix_hw, elem); + + /* Update the multicast index in NPC rule */ + nix_mcast_update_action(rvu, elem); + } + mutex_unlock(&mcast_grp->mcast_grp_lock); +} + int rvu_mbox_handler_nix_lf_start_rx(struct rvu *rvu, struct msg_req *req, struct msg_rsp *rsp) { @@ -4419,6 +5454,9 @@ int rvu_mbox_handler_nix_lf_start_rx(struct rvu *rvu, struct msg_req *req, if (err) return err; + /* Enable the interface if it is in any multicast list */ + nix_mcast_update_mce_entry(rvu, pcifunc, 1); + rvu_npc_enable_default_entries(rvu, pcifunc, nixlf); npc_mcam_enable_flows(rvu, pcifunc); @@ -4443,6 +5481,9 @@ int rvu_mbox_handler_nix_lf_stop_rx(struct rvu *rvu, struct msg_req *req, return err; rvu_npc_disable_mcam_entries(rvu, pcifunc, nixlf); + /* Disable the interface if it is in any multicast list */ + nix_mcast_update_mce_entry(rvu, pcifunc, 0); + pfvf = rvu_get_pfvf(rvu, pcifunc); clear_bit(NIXLF_INITIALIZED, &pfvf->flags); @@ -4456,6 +5497,8 @@ int rvu_mbox_handler_nix_lf_stop_rx(struct rvu *rvu, struct msg_req *req, return 0; } +#define RX_SA_BASE GENMASK_ULL(52, 7) + void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int nixlf) { struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); @@ -4463,6 +5506,7 @@ void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int nixlf) int pf = rvu_get_pf(pcifunc); struct mac_ops *mac_ops; u8 cgx_id, lmac_id; + u64 sa_base; void *cgxd; int err; @@ -4475,6 +5519,9 @@ void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int nixlf) nix_rx_sync(rvu, blkaddr); nix_txschq_free(rvu, pcifunc); + /* Reset SPI to SA index table */ + rvu_nix_free_spi_to_sa_table(rvu, pcifunc); + clear_bit(NIXLF_INITIALIZED, &pfvf->flags); rvu_cgx_start_stop_io(rvu, pcifunc, false); @@ -4516,9 +5563,32 @@ void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int nixlf) pfvf->hw_rx_tstamp_en = false; } + /* reset priority flow control config */ + rvu_cgx_prio_flow_ctrl_cfg(rvu, pcifunc, 0, 0, 0); + + /* reset 802.3x flow control config */ + rvu_cgx_cfg_pause_frm(rvu, pcifunc, 0, 0); + nix_ctx_free(rvu, pfvf); nix_free_all_bandprof(rvu, pcifunc); + + sa_base = rvu_read64(rvu, blkaddr, NIX_AF_LFX_RX_IPSEC_SA_BASE(nixlf)); + if (FIELD_GET(RX_SA_BASE, sa_base)) { + err = rvu_cpt_ctx_flush(rvu, pcifunc); + if (err) + dev_err(rvu->dev, + "CPT ctx flush failed with error: %d\n", err); + } + if (is_block_implemented(rvu->hw, BLKADDR_CPT0)) { + /* reset the configuration related to inline ipsec */ + rvu_write64(rvu, blkaddr, NIX_AF_LFX_RX_IPSEC_CFG0(nixlf), + 0x0); + rvu_write64(rvu, blkaddr, NIX_AF_LFX_RX_IPSEC_CFG1(nixlf), + 0x0); + rvu_write64(rvu, blkaddr, NIX_AF_LFX_RX_IPSEC_SA_BASE(nixlf), + 0x0); + } } #define NIX_AF_LFX_TX_CFG_PTP_EN BIT_ULL(32) @@ -4559,6 +5629,10 @@ static int rvu_nix_lf_ptp_tx_cfg(struct rvu *rvu, u16 pcifunc, bool enable) int rvu_mbox_handler_nix_lf_ptp_tx_enable(struct rvu *rvu, struct msg_req *req, struct msg_rsp *rsp) { + /* Silicon does not support enabling time stamp in higig mode */ + if (rvu_cgx_is_higig2_enabled(rvu, rvu_get_pf(req->hdr.pcifunc))) + return NIX_AF_ERR_PTP_CONFIG_FAIL; + return rvu_nix_lf_ptp_tx_cfg(rvu, req->hdr.pcifunc, true); } @@ -4619,6 +5693,157 @@ int rvu_mbox_handler_nix_lso_format_cfg(struct rvu *rvu, return 0; } +#define IPSEC_GEN_CFG_EGRP GENMASK_ULL(50, 48) +#define IPSEC_GEN_CFG_OPCODE GENMASK_ULL(47, 32) +#define IPSEC_GEN_CFG_PARAM1 GENMASK_ULL(31, 16) +#define IPSEC_GEN_CFG_PARAM2 GENMASK_ULL(15, 0) + +#define CPT_INST_QSEL_BLOCK GENMASK_ULL(28, 24) +#define CPT_INST_QSEL_PF_FUNC GENMASK_ULL(23, 8) +#define CPT_INST_QSEL_SLOT GENMASK_ULL(7, 0) + +#define CPT_INST_CREDIT_TH GENMASK_ULL(53, 32) +#define CPT_INST_CREDIT_BPID GENMASK_ULL(30, 22) +#define CPT_INST_CREDIT_CNT GENMASK_ULL(21, 0) + +static void nix_inline_ipsec_cfg(struct rvu *rvu, struct nix_inline_ipsec_cfg *req, + int blkaddr) +{ + u8 cpt_idx, cpt_blkaddr; + u64 val = 0; + + cpt_idx = (blkaddr == BLKADDR_NIX0) ? 0 : 1; + if (req->enable) { + val = 0; + /* Enable context prefetching */ + if (!is_rvu_otx2(rvu)) + val |= BIT_ULL(51); + + /* Set OPCODE and EGRP */ + val |= FIELD_PREP(IPSEC_GEN_CFG_EGRP, req->gen_cfg.egrp); + val |= FIELD_PREP(IPSEC_GEN_CFG_OPCODE, req->gen_cfg.opcode); + val |= FIELD_PREP(IPSEC_GEN_CFG_PARAM1, req->gen_cfg.param1); + val |= FIELD_PREP(IPSEC_GEN_CFG_PARAM2, req->gen_cfg.param2); + + rvu_write64(rvu, blkaddr, NIX_AF_RX_IPSEC_GEN_CFG, val); + + /* Set CPT queue for inline IPSec */ + val = FIELD_PREP(CPT_INST_QSEL_SLOT, req->inst_qsel.cpt_slot); + val |= FIELD_PREP(CPT_INST_QSEL_PF_FUNC, + req->inst_qsel.cpt_pf_func); + + if (!is_rvu_otx2(rvu)) { + cpt_blkaddr = (cpt_idx == 0) ? BLKADDR_CPT0 : + BLKADDR_CPT1; + val |= FIELD_PREP(CPT_INST_QSEL_BLOCK, cpt_blkaddr); + } + + rvu_write64(rvu, blkaddr, NIX_AF_RX_CPTX_INST_QSEL(cpt_idx), + val); + + /* Set CPT credit */ + val = rvu_read64(rvu, blkaddr, NIX_AF_RX_CPTX_CREDIT(cpt_idx)); + if ((val & 0x3FFFFF) != 0x3FFFFF) + rvu_write64(rvu, blkaddr, NIX_AF_RX_CPTX_CREDIT(cpt_idx), + 0x3FFFFF - val); + + val = FIELD_PREP(CPT_INST_CREDIT_CNT, req->cpt_credit); + val |= FIELD_PREP(CPT_INST_CREDIT_BPID, req->bpid); + val |= FIELD_PREP(CPT_INST_CREDIT_TH, req->credit_th); + rvu_write64(rvu, blkaddr, NIX_AF_RX_CPTX_CREDIT(cpt_idx), val); + } else { + rvu_write64(rvu, blkaddr, NIX_AF_RX_IPSEC_GEN_CFG, 0x0); + rvu_write64(rvu, blkaddr, NIX_AF_RX_CPTX_INST_QSEL(cpt_idx), + 0x0); + val = rvu_read64(rvu, blkaddr, NIX_AF_RX_CPTX_CREDIT(cpt_idx)); + if ((val & 0x3FFFFF) != 0x3FFFFF) + rvu_write64(rvu, blkaddr, NIX_AF_RX_CPTX_CREDIT(cpt_idx), + 0x3FFFFF - val); + } +} + +int rvu_mbox_handler_nix_inline_ipsec_cfg(struct rvu *rvu, + struct nix_inline_ipsec_cfg *req, + struct msg_rsp *rsp) +{ + if (!is_block_implemented(rvu->hw, BLKADDR_CPT0)) + return 0; + + nix_inline_ipsec_cfg(rvu, req, BLKADDR_NIX0); + if (is_block_implemented(rvu->hw, BLKADDR_CPT1)) + nix_inline_ipsec_cfg(rvu, req, BLKADDR_NIX1); + + return 0; +} + +int rvu_mbox_handler_nix_read_inline_ipsec_cfg(struct rvu *rvu, + struct msg_req *req, + struct nix_inline_ipsec_cfg *rsp) + +{ + u64 val; + + if (!is_block_implemented(rvu->hw, BLKADDR_CPT0)) + return 0; + + val = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_RX_IPSEC_GEN_CFG); + rsp->gen_cfg.egrp = FIELD_GET(IPSEC_GEN_CFG_EGRP, val); + rsp->gen_cfg.opcode = FIELD_GET(IPSEC_GEN_CFG_OPCODE, val); + rsp->gen_cfg.param1 = FIELD_GET(IPSEC_GEN_CFG_PARAM1, val); + rsp->gen_cfg.param2 = FIELD_GET(IPSEC_GEN_CFG_PARAM2, val); + + val = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_RX_CPTX_CREDIT(0)); + rsp->cpt_credit = FIELD_GET(CPT_INST_CREDIT_CNT, val); + rsp->credit_th = FIELD_GET(CPT_INST_CREDIT_TH, val); + rsp->bpid = FIELD_GET(CPT_INST_CREDIT_BPID, val); + + return 0; +} + +int rvu_mbox_handler_nix_inline_ipsec_lf_cfg(struct rvu *rvu, + struct nix_inline_ipsec_lf_cfg *req, + struct msg_rsp *rsp) +{ + int lf, blkaddr, err; + u64 val; + + if (!is_block_implemented(rvu->hw, BLKADDR_CPT0)) + return 0; + + err = nix_get_nixlf(rvu, req->hdr.pcifunc, &lf, &blkaddr); + if (err) + return err; + + if (req->enable) { + /* Set TT, TAG_CONST, SA_POW2_SIZE and LENM1_MAX */ + val = (u64)req->ipsec_cfg0.tt << 44 | + (u64)req->ipsec_cfg0.tag_const << 20 | + (u64)req->ipsec_cfg0.sa_pow2_size << 16 | + req->ipsec_cfg0.lenm1_max; + + if (blkaddr == BLKADDR_NIX1) + val |= BIT_ULL(46); + + rvu_write64(rvu, blkaddr, NIX_AF_LFX_RX_IPSEC_CFG0(lf), val); + + /* Set SA_IDX_W and SA_IDX_MAX */ + val = (u64)req->ipsec_cfg1.sa_idx_w << 32 | + req->ipsec_cfg1.sa_idx_max; + rvu_write64(rvu, blkaddr, NIX_AF_LFX_RX_IPSEC_CFG1(lf), val); + + /* Set SA base address */ + rvu_write64(rvu, blkaddr, NIX_AF_LFX_RX_IPSEC_SA_BASE(lf), + req->sa_base_addr); + } else { + rvu_write64(rvu, blkaddr, NIX_AF_LFX_RX_IPSEC_CFG0(lf), 0x0); + rvu_write64(rvu, blkaddr, NIX_AF_LFX_RX_IPSEC_CFG1(lf), 0x0); + rvu_write64(rvu, blkaddr, NIX_AF_LFX_RX_IPSEC_SA_BASE(lf), + 0x0); + } + + return 0; +} + void rvu_nix_reset_mac(struct rvu_pfvf *pfvf, int pcifunc) { bool from_vf = !!(pcifunc & RVU_PFVF_FUNC_MASK); @@ -5133,6 +6358,7 @@ int rvu_nix_setup_ratelimit_aggr(struct rvu *rvu, u16 pcifunc, aq_req.ctype = NIX_AQ_CTYPE_BANDPROF; aq_req.op = NIX_AQ_INSTOP_WRITE; memcpy(&aq_req.prof, &aq_rsp.prof, sizeof(struct nix_bandprof_s)); + memset((char *)&aq_req.prof_mask, 0xff, sizeof(struct nix_bandprof_s)); /* Clear higher layer enable bit in the mid profile, just in case */ aq_req.prof.hl_en = 0; aq_req.prof_mask.hl_en = 1; @@ -5248,3 +6474,572 @@ int rvu_mbox_handler_nix_bandprof_get_hwinfo(struct rvu *rvu, struct msg_req *re return 0; } + +int rvu_mbox_handler_nix_rx_sw_sync(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + int blkaddr; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, req->hdr.pcifunc); + if (blkaddr < 0) + return NIX_AF_ERR_AF_LF_INVALID; + + nix_rx_sync(rvu, blkaddr); + return 0; +} + +bool rvu_nix_is_ptp_tx_enabled(struct rvu *rvu, u16 pcifunc) +{ + struct rvu_hwinfo *hw = rvu->hw; + struct rvu_block *block; + int blkaddr; + int nixlf; + u64 cfg; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); + if (blkaddr < 0) + return NIX_AF_ERR_AF_LF_INVALID; + + block = &hw->block[blkaddr]; + nixlf = rvu_get_lf(rvu, block, pcifunc, 0); + if (nixlf < 0) + return NIX_AF_ERR_AF_LF_INVALID; + + cfg = rvu_read64(rvu, blkaddr, NIX_AF_LFX_TX_CFG(nixlf)); + return (cfg & BIT_ULL(32)); +} + +static inline void +configure_rq_mask(struct rvu *rvu, int blkaddr, int nixlf, + u8 rq_mask, bool enable) +{ + u64 cfg; + u64 reg; + + cfg = rvu_read64(rvu, blkaddr, NIX_AF_LFX_RX_IPSEC_CFG1(nixlf)); + reg = rvu_read64(rvu, blkaddr, NIX_AF_LFX_CFG(nixlf)); + if (enable) { + cfg |= BIT_ULL(43); + reg = (reg & ~GENMASK_ULL(36, 35)) | ((u64)rq_mask << 35); + } else { + cfg &= ~BIT_ULL(43); + reg = (reg & ~GENMASK_ULL(36, 35)); + } + rvu_write64(rvu, blkaddr, NIX_AF_LFX_RX_IPSEC_CFG1(nixlf), cfg); + rvu_write64(rvu, blkaddr, NIX_AF_LFX_CFG(nixlf), reg); +} + +static inline void +configure_spb_cpt(struct rvu *rvu, int blkaddr, int nixlf, + struct nix_rq_cpt_field_mask_cfg_req *req, bool enable) +{ + u64 cfg; + + cfg = rvu_read64(rvu, blkaddr, NIX_AF_LFX_RX_IPSEC_CFG1(nixlf)); + if (enable) { + cfg |= BIT_ULL(37); + cfg &= ~GENMASK_ULL(42, 38); + cfg |= ((u64)req->ipsec_cfg1.spb_cpt_sizem1 << 38); + cfg &= ~GENMASK_ULL(63, 44); + cfg |= ((u64)req->ipsec_cfg1.spb_cpt_aura << 44); + } else { + cfg &= ~BIT_ULL(37); + cfg &= ~GENMASK_ULL(42, 38); + cfg &= ~GENMASK_ULL(63, 44); + } + rvu_write64(rvu, blkaddr, NIX_AF_LFX_RX_IPSEC_CFG1(nixlf), cfg); +} + +static +int nix_inline_rq_mask_alloc(struct rvu *rvu, + struct nix_rq_cpt_field_mask_cfg_req *req, + struct nix_hw *nix_hw, int blkaddr) +{ + u8 rq_cpt_mask_select; + u64 reg_mask; + u64 reg_set; + int idx, rq_idx; + + for (idx = 0; idx < nix_hw->rq_msk.in_use; idx++) { + for (rq_idx = 0; rq_idx < RQ_CTX_MASK_MAX; rq_idx++) { + reg_mask = rvu_read64(rvu, blkaddr, + NIX_AF_RX_RQX_MASKX(idx, rq_idx)); + reg_set = rvu_read64(rvu, blkaddr, + NIX_AF_RX_RQX_SETX(idx, rq_idx)); + if (reg_mask != req->rq_ctx_word_mask[rq_idx] || + reg_set != req->rq_ctx_word_set[rq_idx]) + break; + } + if (rq_idx == RQ_CTX_MASK_MAX) + break; + } + + if (idx < nix_hw->rq_msk.in_use) { + /* Match found */ + rq_cpt_mask_select = idx; + return idx; + } + + if (nix_hw->rq_msk.in_use == nix_hw->rq_msk.total) + return NIX_AF_ERR_RQ_CPT_MASK; + + rq_cpt_mask_select = nix_hw->rq_msk.in_use++; + + for (rq_idx = 0; rq_idx < RQ_CTX_MASK_MAX; rq_idx++) { + rvu_write64(rvu, blkaddr, + NIX_AF_RX_RQX_MASKX(rq_cpt_mask_select, rq_idx), + req->rq_ctx_word_mask[rq_idx]); + rvu_write64(rvu, blkaddr, + NIX_AF_RX_RQX_SETX(rq_cpt_mask_select, rq_idx), + req->rq_ctx_word_set[rq_idx]); + } + + return rq_cpt_mask_select; +} + +int rvu_mbox_handler_nix_lf_inline_rq_cfg(struct rvu *rvu, + struct nix_rq_cpt_field_mask_cfg_req *req, + struct msg_rsp *rsp) +{ + struct rvu_hwinfo *hw = rvu->hw; + struct nix_hw *nix_hw; + int blkaddr, nixlf; + int rq_mask, err; + + err = nix_get_nixlf(rvu, req->hdr.pcifunc, &nixlf, &blkaddr); + if (err) + return err; + + nix_hw = get_nix_hw(rvu->hw, blkaddr); + if (!nix_hw) + return NIX_AF_ERR_INVALID_NIXBLK; + + if (!hw->cap.second_cpt_pass) + return NIX_AF_ERR_INVALID_NIXBLK; + + if (req->ipsec_cfg1.rq_mask_enable) { + rq_mask = nix_inline_rq_mask_alloc(rvu, req, nix_hw, blkaddr); + if (rq_mask < 0) + return NIX_AF_ERR_RQ_CPT_MASK; + } + + configure_rq_mask(rvu, blkaddr, nixlf, rq_mask, + req->ipsec_cfg1.rq_mask_enable); + configure_spb_cpt(rvu, blkaddr, nixlf, req, + req->ipsec_cfg1.spb_cpt_enable); + return 0; +} + +int rvu_mbox_handler_nix_set_vlan_tpid(struct rvu *rvu, + struct nix_set_vlan_tpid *req, + struct msg_rsp *rsp) +{ + u16 pcifunc = req->hdr.pcifunc; + int nixlf, err, blkaddr; + u64 cfg; + + err = nix_get_nixlf(rvu, pcifunc, &nixlf, &blkaddr); + if (err) + return err; + + if (req->vlan_type != NIX_VLAN_TYPE_OUTER && + req->vlan_type != NIX_VLAN_TYPE_INNER) + return NIX_AF_ERR_PARAM; + + cfg = rvu_read64(rvu, blkaddr, NIX_AF_LFX_TX_CFG(nixlf)); + + if (req->vlan_type == NIX_VLAN_TYPE_OUTER) + cfg = (cfg & ~GENMASK_ULL(15, 0)) | req->tpid; + else + cfg = (cfg & ~GENMASK_ULL(31, 16)) | ((u64)req->tpid << 16); + + rvu_write64(rvu, blkaddr, NIX_AF_LFX_TX_CFG(nixlf), cfg); + return 0; +} + +int rvu_mbox_handler_nix_tl1_rr_prio(struct rvu *rvu, + struct nix_tl1_rr_prio_req *req, + struct msg_rsp *rsp) +{ + u16 pcifunc = req->hdr.pcifunc; + int blkaddr, nixlf, schq, err; + struct rvu_pfvf *pfvf; + u16 regval; + + err = nix_get_nixlf(rvu, pcifunc, &nixlf, &blkaddr); + if (err) + return err; + + pfvf = rvu_get_pfvf(rvu, pcifunc); + /* Only PF is allowed */ + if (is_vf(pcifunc)) + return NIX_AF_ERR_TL1_RR_PRIO_PERM_DENIED; + + pfvf->tl1_rr_prio = req->tl1_rr_prio; + + /* update TL1 topology */ + schq = nix_get_tx_link(rvu, pcifunc); + regval = rvu_read64(rvu, blkaddr, NIX_AF_TL1X_TOPOLOGY(schq)); + regval &= ~GENMASK_ULL(4, 1); + regval |= pfvf->tl1_rr_prio << 1; + rvu_write64(rvu, blkaddr, NIX_AF_TL1X_TOPOLOGY(schq), regval); + + return 0; +} + +static struct nix_mcast_grp_elem *rvu_nix_mcast_find_grp_elem(struct nix_mcast_grp *mcast_grp, + u32 mcast_grp_idx) +{ + struct nix_mcast_grp_elem *iter; + bool is_found = false; + + list_for_each_entry(iter, &mcast_grp->mcast_grp_head, list) { + if (iter->mcast_grp_idx == mcast_grp_idx) { + is_found = true; + break; + } + } + + if (is_found) + return iter; + + return NULL; +} + +int rvu_nix_mcast_get_mce_index(struct rvu *rvu, u16 pcifunc, u32 mcast_grp_idx) +{ + struct nix_mcast_grp_elem *elem; + struct nix_mcast_grp *mcast_grp; + struct nix_hw *nix_hw; + int blkaddr, ret; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); + nix_hw = get_nix_hw(rvu->hw, blkaddr); + if (!nix_hw) + return NIX_AF_ERR_INVALID_NIXBLK; + + mcast_grp = &nix_hw->mcast_grp; + mutex_lock(&mcast_grp->mcast_grp_lock); + elem = rvu_nix_mcast_find_grp_elem(mcast_grp, mcast_grp_idx); + if (!elem) + ret = NIX_AF_ERR_INVALID_MCAST_GRP; + else + ret = elem->mce_start_index; + + mutex_unlock(&mcast_grp->mcast_grp_lock); + return ret; +} + +void rvu_nix_mcast_flr_free_entries(struct rvu *rvu, u16 pcifunc) +{ + struct nix_mcast_grp_destroy_req dreq = { 0 }; + struct nix_mcast_grp_update_req ureq = { 0 }; + struct nix_mcast_grp_update_rsp ursp = { 0 }; + struct nix_mcast_grp_elem *elem, *tmp; + struct nix_mcast_grp *mcast_grp; + struct nix_hw *nix_hw; + int blkaddr; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); + nix_hw = get_nix_hw(rvu->hw, blkaddr); + if (!nix_hw) + return; + + mcast_grp = &nix_hw->mcast_grp; + + mutex_lock(&mcast_grp->mcast_grp_lock); + list_for_each_entry_safe(elem, tmp, &mcast_grp->mcast_grp_head, list) { + struct nix_mce_list *mce_list; + struct hlist_node *tmp; + struct mce *mce; + + /* If the pcifunc which created the multicast/mirror + * group received an FLR, then delete the entire group. + */ + if (elem->pcifunc == pcifunc) { + /* Delete group */ + dreq.hdr.pcifunc = elem->pcifunc; + dreq.mcast_grp_idx = elem->mcast_grp_idx; + dreq.is_af = 1; + rvu_mbox_handler_nix_mcast_grp_destroy(rvu, &dreq, NULL); + continue; + } + + /* Iterate the group elements and delete the element which + * received the FLR. + */ + mce_list = &elem->mcast_mce_list; + hlist_for_each_entry_safe(mce, tmp, &mce_list->head, node) { + if (mce->pcifunc == pcifunc) { + ureq.hdr.pcifunc = pcifunc; + ureq.num_mce_entry = 1; + ureq.mcast_grp_idx = elem->mcast_grp_idx; + ureq.op = NIX_MCAST_OP_DEL_ENTRY; + ureq.pcifunc[0] = pcifunc; + ureq.is_af = 1; + rvu_mbox_handler_nix_mcast_grp_update(rvu, &ureq, &ursp); + break; + } + } + } + mutex_unlock(&mcast_grp->mcast_grp_lock); +} + +int rvu_nix_mcast_update_mcam_entry(struct rvu *rvu, u16 pcifunc, + u32 mcast_grp_idx, u16 mcam_index) +{ + struct nix_mcast_grp_elem *elem; + struct nix_mcast_grp *mcast_grp; + struct nix_hw *nix_hw; + int blkaddr, ret = 0; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); + nix_hw = get_nix_hw(rvu->hw, blkaddr); + if (!nix_hw) + return NIX_AF_ERR_INVALID_NIXBLK; + + mcast_grp = &nix_hw->mcast_grp; + mutex_lock(&mcast_grp->mcast_grp_lock); + elem = rvu_nix_mcast_find_grp_elem(mcast_grp, mcast_grp_idx); + if (!elem) + ret = NIX_AF_ERR_INVALID_MCAST_GRP; + else + elem->mcam_index = mcam_index; + + mutex_unlock(&mcast_grp->mcast_grp_lock); + return ret; +} + +int rvu_mbox_handler_nix_mcast_grp_create(struct rvu *rvu, + struct nix_mcast_grp_create_req *req, + struct nix_mcast_grp_create_rsp *rsp) +{ + struct nix_mcast_grp_elem *elem; + struct nix_mcast_grp *mcast_grp; + struct nix_hw *nix_hw; + int blkaddr, err; + + err = nix_get_struct_ptrs(rvu, req->hdr.pcifunc, &nix_hw, &blkaddr); + if (err) + return err; + + mcast_grp = &nix_hw->mcast_grp; + elem = kzalloc(sizeof(*elem), GFP_KERNEL); + if (!elem) + return -ENOMEM; + + INIT_HLIST_HEAD(&elem->mcast_mce_list.head); + elem->mcam_index = -1; + elem->mce_start_index = -1; + elem->pcifunc = req->hdr.pcifunc; + elem->dir = req->dir; + elem->mcast_grp_idx = mcast_grp->next_grp_index++; + + mutex_lock(&mcast_grp->mcast_grp_lock); + list_add_tail(&elem->list, &mcast_grp->mcast_grp_head); + mcast_grp->count++; + mutex_unlock(&mcast_grp->mcast_grp_lock); + + rsp->mcast_grp_idx = elem->mcast_grp_idx; + return 0; +} + +int rvu_mbox_handler_nix_mcast_grp_destroy(struct rvu *rvu, + struct nix_mcast_grp_destroy_req *req, + struct msg_rsp *rsp) +{ + struct npc_delete_flow_req uninstall_req = { 0 }; + struct npc_delete_flow_rsp uninstall_rsp = { 0 }; + struct nix_mcast_grp_elem *elem; + struct nix_mcast_grp *mcast_grp; + int blkaddr, err, ret = 0; + struct nix_mcast *mcast; + struct nix_hw *nix_hw; + + err = nix_get_struct_ptrs(rvu, req->hdr.pcifunc, &nix_hw, &blkaddr); + if (err) + return err; + + mcast_grp = &nix_hw->mcast_grp; + + /* If AF is requesting for the deletion, + * then AF is already taking the lock + */ + if (!req->is_af) + mutex_lock(&mcast_grp->mcast_grp_lock); + + elem = rvu_nix_mcast_find_grp_elem(mcast_grp, req->mcast_grp_idx); + if (!elem) { + ret = NIX_AF_ERR_INVALID_MCAST_GRP; + goto unlock_grp; + } + + /* If no mce entries are associated with the group + * then just remove it from the global list. + */ + if (!elem->mcast_mce_list.count) + goto delete_grp; + + /* Delete the associated mcam entry and + * remove all mce entries from the group + */ + mcast = &nix_hw->mcast; + mutex_lock(&mcast->mce_lock); + if (elem->mcam_index != -1) { + uninstall_req.hdr.pcifunc = req->hdr.pcifunc; + uninstall_req.entry = elem->mcam_index; + rvu_mbox_handler_npc_delete_flow(rvu, &uninstall_req, &uninstall_rsp); + } + + nix_free_mce_list(mcast, elem->mcast_mce_list.count, + elem->mce_start_index, elem->dir); + nix_delete_mcast_mce_list(&elem->mcast_mce_list); + mutex_unlock(&mcast->mce_lock); + +delete_grp: + list_del(&elem->list); + kfree(elem); + mcast_grp->count--; + +unlock_grp: + if (!req->is_af) + mutex_unlock(&mcast_grp->mcast_grp_lock); + + return ret; +} + +int rvu_mbox_handler_nix_mcast_grp_update(struct rvu *rvu, + struct nix_mcast_grp_update_req *req, + struct nix_mcast_grp_update_rsp *rsp) +{ + struct nix_mcast_grp_destroy_req dreq = { 0 }; + struct npc_mcam *mcam = &rvu->hw->mcam; + struct nix_mcast_grp_elem *elem; + struct nix_mcast_grp *mcast_grp; + int blkaddr, err, npc_blkaddr; + u16 prev_count, new_count; + struct nix_mcast *mcast; + struct nix_hw *nix_hw; + int i, ret; + + if (!req->num_mce_entry) + return 0; + + err = nix_get_struct_ptrs(rvu, req->hdr.pcifunc, &nix_hw, &blkaddr); + if (err) + return err; + + mcast_grp = &nix_hw->mcast_grp; + + /* If AF is requesting for the updation, + * then AF is already taking the lock. + */ + if (!req->is_af) + mutex_lock(&mcast_grp->mcast_grp_lock); + + elem = rvu_nix_mcast_find_grp_elem(mcast_grp, req->mcast_grp_idx); + if (!elem) { + ret = NIX_AF_ERR_INVALID_MCAST_GRP; + goto unlock_grp; + } + + /* If any pcifunc matches the group's pcifunc, then we can + * delete the entire group. + */ + if (req->op == NIX_MCAST_OP_DEL_ENTRY) { + for (i = 0; i < req->num_mce_entry; i++) { + if (elem->pcifunc == req->pcifunc[i]) { + /* Delete group */ + dreq.hdr.pcifunc = elem->pcifunc; + dreq.mcast_grp_idx = elem->mcast_grp_idx; + dreq.is_af = 1; + rvu_mbox_handler_nix_mcast_grp_destroy(rvu, &dreq, NULL); + ret = 0; + goto unlock_grp; + } + } + } + + mcast = &nix_hw->mcast; + mutex_lock(&mcast->mce_lock); + npc_blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + if (elem->mcam_index != -1) + npc_enable_mcam_entry(rvu, mcam, npc_blkaddr, elem->mcam_index, false); + + prev_count = elem->mcast_mce_list.count; + if (req->op == NIX_MCAST_OP_ADD_ENTRY) { + new_count = prev_count + req->num_mce_entry; + if (prev_count) + nix_free_mce_list(mcast, prev_count, elem->mce_start_index, elem->dir); + + elem->mce_start_index = nix_alloc_mce_list(mcast, new_count, elem->dir); + + /* It is possible not to get contiguous memory */ + if (elem->mce_start_index < 0) { + if (elem->mcam_index != -1) { + npc_enable_mcam_entry(rvu, mcam, npc_blkaddr, + elem->mcam_index, true); + ret = NIX_AF_ERR_NON_CONTIG_MCE_LIST; + goto unlock_mce; + } + } + + ret = nix_add_mce_list_entry(rvu, nix_hw, elem, req); + if (ret) { + nix_free_mce_list(mcast, new_count, elem->mce_start_index, elem->dir); + if (prev_count) + elem->mce_start_index = nix_alloc_mce_list(mcast, + prev_count, + elem->dir); + + if (elem->mcam_index != -1) + npc_enable_mcam_entry(rvu, mcam, npc_blkaddr, + elem->mcam_index, true); + + goto unlock_mce; + } + } else { + if (!prev_count || prev_count < req->num_mce_entry) { + if (elem->mcam_index != -1) + npc_enable_mcam_entry(rvu, mcam, npc_blkaddr, + elem->mcam_index, true); + ret = NIX_AF_ERR_INVALID_MCAST_DEL_REQ; + goto unlock_mce; + } + + nix_free_mce_list(mcast, prev_count, elem->mce_start_index, elem->dir); + new_count = prev_count - req->num_mce_entry; + elem->mce_start_index = nix_alloc_mce_list(mcast, new_count, elem->dir); + ret = nix_del_mce_list_entry(rvu, nix_hw, elem, req); + if (ret) { + nix_free_mce_list(mcast, new_count, elem->mce_start_index, elem->dir); + elem->mce_start_index = nix_alloc_mce_list(mcast, prev_count, elem->dir); + if (elem->mcam_index != -1) + npc_enable_mcam_entry(rvu, mcam, + npc_blkaddr, + elem->mcam_index, + true); + goto unlock_mce; + } + } + + if (elem->mcam_index == -1) { + rsp->mce_start_index = elem->mce_start_index; + ret = 0; + goto unlock_mce; + } + + nix_mcast_update_action(rvu, elem); + npc_enable_mcam_entry(rvu, mcam, npc_blkaddr, elem->mcam_index, true); + rsp->mce_start_index = elem->mce_start_index; + ret = 0; + +unlock_mce: + mutex_unlock(&mcast->mce_lock); + +unlock_grp: + if (!req->is_af) + mutex_unlock(&mcast_grp->mcast_grp_lock); + + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 161ad2ae4019..a55cacb988ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1682,8 +1682,9 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, } trace_mlx5_fs_set_fte(fte, false); + /* Link newly added rules into the tree. */ for (i = 0; i < handle->num_rules; i++) { - if (refcount_read(&handle->rule[i]->node.refcount) == 1) { + if (!handle->rule[i]->node.parent) { tree_add_node(&handle->rule[i]->node, &fte->node); trace_mlx5_fs_add_rule(handle->rule[i]); } diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c index 189a6a0a2e08..8561a7bf53e1 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c @@ -730,7 +730,7 @@ static int sparx5_port_pcs_low_set(struct sparx5 *sparx5, bool sgmii = false, inband_aneg = false; int err; - if (port->conf.inband) { + if (conf->inband) { if (conf->portmode == PHY_INTERFACE_MODE_SGMII || conf->portmode == PHY_INTERFACE_MODE_QSGMII) inband_aneg = true; /* Cisco-SGMII in-band-aneg */ @@ -947,7 +947,7 @@ int sparx5_port_pcs_set(struct sparx5 *sparx5, if (err) return -EINVAL; - if (port->conf.inband) { + if (conf->inband) { /* Enable/disable 1G counters in ASM */ spx5_rmw(ASM_PORT_CFG_CSC_STAT_DIS_SET(high_speed_dev), ASM_PORT_CFG_CSC_STAT_DIS, diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 9569b5cc595e..0e4ea3c0fe82 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -909,7 +909,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - if (!pskb_inet_may_pull(skb)) + if (!skb_vlan_inet_prepare(skb)) return -EINVAL; sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); @@ -1006,7 +1006,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - if (!pskb_inet_may_pull(skb)) + if (!skb_vlan_inet_prepare(skb)) return -EINVAL; sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); diff --git a/drivers/scsi/qla2xxx/qla_edif.c b/drivers/scsi/qla2xxx/qla_edif.c index 40a03f9c2d21..ac702f74dd98 100644 --- a/drivers/scsi/qla2xxx/qla_edif.c +++ b/drivers/scsi/qla2xxx/qla_edif.c @@ -1012,7 +1012,7 @@ qla_edif_app_getstats(scsi_qla_host_t *vha, struct bsg_job *bsg_job) list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) { if (fcport->edif.enable) { - if (pcnt > app_req.num_ports) + if (pcnt >= app_req.num_ports) break; app_reply->elem[pcnt].rekey_count = diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 99cdd59f4e0c..061af5dc92e6 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2518,9 +2518,19 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq) r = vhost_get_avail_idx(vq, &avail_idx); if (unlikely(r)) return false; + vq->avail_idx = vhost16_to_cpu(vq, avail_idx); + if (vq->avail_idx != vq->last_avail_idx) { + /* Since we have updated avail_idx, the following + * call to vhost_get_vq_desc() will read available + * ring entries. Make sure that read happens after + * the avail_idx read. + */ + smp_rmb(); + return false; + } - return vq->avail_idx == vq->last_avail_idx; + return true; } EXPORT_SYMBOL_GPL(vhost_vq_avail_empty); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 34278cb5f964..c50cabf69415 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -4080,6 +4080,8 @@ void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes) BTRFS_QGROUP_RSV_META_PREALLOC); trace_qgroup_meta_convert(root, num_bytes); qgroup_convert_meta(fs_info, root->root_key.objectid, num_bytes); + if (!sb_rdonly(fs_info->sb)) + add_root_meta_rsv(root, num_bytes, BTRFS_QGROUP_RSV_META_PERTRANS); } /* diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 9d276655cc25..6659d0369ec5 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -631,4 +631,11 @@ u64 dma_fence_context_alloc(unsigned num); ##args); \ } while (0) +#define DMA_FENCE_WARN(f, fmt, args...) \ + do { \ + struct dma_fence *__ff = (f); \ + pr_warn("f %llu#%llu: " fmt, __ff->context, __ff->seqno,\ + ##args); \ + } while (0) + #endif /* __LINUX_DMA_FENCE_H */ diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 747f40e0c326..37738ec87de3 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -133,7 +133,7 @@ do { \ # define lockdep_softirq_enter() do { } while (0) # define lockdep_softirq_exit() do { } while (0) # define lockdep_hrtimer_enter(__hrtimer) false -# define lockdep_hrtimer_exit(__context) do { } while (0) +# define lockdep_hrtimer_exit(__context) do { (void)(__context); } while (0) # define lockdep_posixtimer_enter() do { } while (0) # define lockdep_posixtimer_exit() do { } while (0) # define lockdep_irq_work_enter(__work) do { } while (0) diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index e81856c0ba13..6a0f2097d370 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -66,7 +66,7 @@ #include <linux/seqlock.h> struct u64_stats_sync { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) seqcount_t seq; #endif }; @@ -115,7 +115,7 @@ static inline void u64_stats_inc(u64_stats_t *p) } #endif -#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) #define u64_stats_init(syncp) seqcount_init(&(syncp)->seq) #else static inline void u64_stats_init(struct u64_stats_sync *syncp) @@ -125,15 +125,19 @@ static inline void u64_stats_init(struct u64_stats_sync *syncp) static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_disable(); write_seqcount_begin(&syncp->seq); #endif } static inline void u64_stats_update_end(struct u64_stats_sync *syncp) { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) write_seqcount_end(&syncp->seq); + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable(); #endif } @@ -142,8 +146,11 @@ u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp) { unsigned long flags = 0; -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - local_irq_save(flags); +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_disable(); + else + local_irq_save(flags); write_seqcount_begin(&syncp->seq); #endif return flags; @@ -153,15 +160,18 @@ static inline void u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp, unsigned long flags) { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) write_seqcount_end(&syncp->seq); - local_irq_restore(flags); + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable(); + else + local_irq_restore(flags); #endif } static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp) { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) return read_seqcount_begin(&syncp->seq); #else return 0; @@ -170,7 +180,7 @@ static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync * static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp) { -#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT)) preempt_disable(); #endif return __u64_stats_fetch_begin(syncp); @@ -179,7 +189,7 @@ static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *sy static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, unsigned int start) { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) return read_seqcount_retry(&syncp->seq, start); #else return false; @@ -189,7 +199,7 @@ static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, unsigned int start) { -#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT)) preempt_enable(); #endif return __u64_stats_fetch_retry(syncp, start); @@ -203,7 +213,9 @@ static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, */ static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp) { -#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT) + preempt_disable(); +#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP) local_irq_disable(); #endif return __u64_stats_fetch_begin(syncp); @@ -212,7 +224,9 @@ static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp, unsigned int start) { -#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT) + preempt_enable(); +#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP) local_irq_enable(); #endif return __u64_stats_fetch_retry(syncp, start); diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 700a19e0455e..5cf1a7377407 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -435,6 +435,10 @@ static inline void in6_ifa_hold(struct inet6_ifaddr *ifp) refcount_inc(&ifp->refcnt); } +static inline bool in6_ifa_hold_safe(struct inet6_ifaddr *ifp) +{ + return refcount_inc_not_zero(&ifp->refcnt); +} /* * compute link-local solicited-node multicast address diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 32d21983c696..094afdf7dea1 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -56,7 +56,7 @@ struct unix_sock { struct mutex iolock, bindlock; struct sock *peer; struct list_head link; - atomic_long_t inflight; + unsigned long inflight; spinlock_t lock; unsigned long gc_flags; #define UNIX_GC_CANDIDATE 0 diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 17ec652e8f12..eca36edb8557 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -332,6 +332,39 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb) return pskb_network_may_pull(skb, nhlen); } +/* Variant of pskb_inet_may_pull(). + */ +static inline bool skb_vlan_inet_prepare(struct sk_buff *skb) +{ + int nhlen = 0, maclen = ETH_HLEN; + __be16 type = skb->protocol; + + /* Essentially this is skb_protocol(skb, true) + * And we get MAC len. + */ + if (eth_type_vlan(type)) + type = __vlan_get_protocol(skb, type, &maclen); + + switch (type) { +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + nhlen = sizeof(struct ipv6hdr); + break; +#endif + case htons(ETH_P_IP): + nhlen = sizeof(struct iphdr); + break; + } + /* For ETH_P_IPV6/ETH_P_IP we make sure to pull + * a base network header in skb->head. + */ + if (!pskb_may_pull(skb, maclen + nhlen)) + return false; + + skb_set_network_header(skb, maclen); + return true; +} + static inline int ip_encap_hlen(struct ip_tunnel_encap *e) { const struct ip_tunnel_encap_ops *ops; diff --git a/kernel/cpu.c b/kernel/cpu.c index 0e786de993e0..297579dda40a 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2722,7 +2722,8 @@ enum cpu_mitigations { }; static enum cpu_mitigations cpu_mitigations __ro_after_init = - CPU_MITIGATIONS_AUTO; + IS_ENABLED(CONFIG_SPECULATION_MITIGATIONS) ? CPU_MITIGATIONS_AUTO : + CPU_MITIGATIONS_OFF; static int __init mitigations_parse_cmdline(char *arg) { diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c6bcb80785d8..2ec1473146ca 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1509,7 +1509,6 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write); old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries); - local_inc(&cpu_buffer->pages_touched); /* * Just make sure we have seen our old_write and synchronize * with any interrupts that come in. @@ -1546,8 +1545,9 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, */ local_set(&next_page->page->commit, 0); - /* Again, either we update tail_page or an interrupt does */ - (void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page); + /* Either we update tail_page or an interrupt does */ + if (try_cmpxchg(&cpu_buffer->tail_page, &tail_page, next_page)) + local_inc(&cpu_buffer->pages_touched); } } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 0a7348b90ba5..1f4f3096b9ac 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1645,6 +1645,7 @@ static int trace_format_open(struct inode *inode, struct file *file) return 0; } +#ifdef CONFIG_PERF_EVENTS static ssize_t event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { @@ -1659,6 +1660,7 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); } +#endif static ssize_t event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, @@ -2104,10 +2106,12 @@ static const struct file_operations ftrace_event_format_fops = { .release = seq_release, }; +#ifdef CONFIG_PERF_EVENTS static const struct file_operations ftrace_event_id_fops = { .read = event_id_read, .llseek = default_llseek, }; +#endif static const struct file_operations ftrace_event_filter_fops = { .open = tracing_open_file_tr, diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 1e1cf0e8a142..660a5594a647 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -3948,7 +3948,7 @@ void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface) spin_lock_bh(&bat_priv->tt.commit_lock); - while (true) { + while (timeout) { table_size = batadv_tt_local_table_transmit_size(bat_priv); if (packet_size_max >= table_size) break; diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index c2db60ad0f1d..90392c8fe5dd 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -108,8 +108,10 @@ static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, if (hdev->req_status == HCI_REQ_PEND) { hdev->req_result = result; hdev->req_status = HCI_REQ_DONE; - if (skb) + if (skb) { + kfree_skb(hdev->req_skb); hdev->req_skb = skb_get(skb); + } wake_up_interruptible(&hdev->req_wait_q); } } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 07ecb16231cd..a9d5a1973224 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -965,6 +965,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; @@ -1265,6 +1267,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 1e1e7488d6bf..aee7cd584c92 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1119,6 +1119,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; @@ -1505,6 +1507,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 12c59d700942..4ff94596f8cd 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -933,13 +933,11 @@ void ip_rt_send_redirect(struct sk_buff *skb) icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw); peer->rate_last = jiffies; ++peer->n_redirects; -#ifdef CONFIG_IP_ROUTE_VERBOSE - if (log_martians && + if (IS_ENABLED(CONFIG_IP_ROUTE_VERBOSE) && log_martians && peer->n_redirects == ip_rt_redirect_number) net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", &ip_hdr(skb)->saddr, inet_iif(skb), &ip_hdr(skb)->daddr, &gw); -#endif } out_put_peer: inet_putpeer(peer); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 968ca078191c..a17e1d744b2d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2054,9 +2054,10 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add if (ipv6_addr_equal(&ifp->addr, addr)) { if (!dev || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { - result = ifp; - in6_ifa_hold(ifp); - break; + if (in6_ifa_hold_safe(ifp)) { + result = ifp; + break; + } } } } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index bbb9ed6d1ae6..c0ff5ee490e7 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1375,7 +1375,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, struct nl_info *info, struct netlink_ext_ack *extack) { struct fib6_table *table = rt->fib6_table; - struct fib6_node *fn, *pn = NULL; + struct fib6_node *fn; +#ifdef CONFIG_IPV6_SUBTREES + struct fib6_node *pn = NULL; +#endif int err = -ENOMEM; int allow_create = 1; int replace_required = 0; @@ -1399,9 +1402,9 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, goto out; } +#ifdef CONFIG_IPV6_SUBTREES pn = fn; -#ifdef CONFIG_IPV6_SUBTREES if (rt->fib6_src.plen) { struct fib6_node *sn; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index b17990d514ee..afd22ea9f555 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1137,6 +1137,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; @@ -1515,6 +1517,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 7106ce231a2d..60dd6f32d520 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1704,8 +1704,9 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, if (ct_info.timeout[0]) { if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto, ct_info.timeout)) - pr_info_ratelimited("Failed to associated timeout " - "policy `%s'\n", ct_info.timeout); + OVS_NLERR(log, + "Failed to associated timeout policy '%s'", + ct_info.timeout); else ct_info.nf_ct_timeout = rcu_dereference( nf_ct_timeout_find(ct_info.ct)->timeout); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 265dc665c92a..628d97c195a7 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -877,11 +877,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, sk->sk_write_space = unix_write_space; sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; sk->sk_destruct = unix_sock_destructor; - u = unix_sk(sk); + u = unix_sk(sk); + u->inflight = 0; u->path.dentry = NULL; u->path.mnt = NULL; spin_lock_init(&u->lock); - atomic_long_set(&u->inflight, 0); INIT_LIST_HEAD(&u->link); mutex_init(&u->iolock); /* single task reading lock */ mutex_init(&u->bindlock); /* single task binding lock */ @@ -2567,7 +2567,9 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, } } else if (!(flags & MSG_PEEK)) { skb_unlink(skb, &sk->sk_receive_queue); - consume_skb(skb); + WRITE_ONCE(u->oob_skb, NULL); + if (!WARN_ON_ONCE(skb_unref(skb))) + kfree_skb(skb); skb = skb_peek(&sk->sk_receive_queue); } } diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 9bfffe2a7f02..85c6f05c0fa3 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -166,17 +166,18 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *), static void dec_inflight(struct unix_sock *usk) { - atomic_long_dec(&usk->inflight); + usk->inflight--; } static void inc_inflight(struct unix_sock *usk) { - atomic_long_inc(&usk->inflight); + usk->inflight++; } static void inc_inflight_move_tail(struct unix_sock *u) { - atomic_long_inc(&u->inflight); + u->inflight++; + /* If this still might be part of a cycle, move it to the end * of the list, so that it's checked even if it was already * passed over @@ -234,20 +235,34 @@ void unix_gc(void) * receive queues. Other, non candidate sockets _can_ be * added to queue, so we must make sure only to touch * candidates. + * + * Embryos, though never candidates themselves, affect which + * candidates are reachable by the garbage collector. Before + * being added to a listener's queue, an embryo may already + * receive data carrying SCM_RIGHTS, potentially making the + * passed socket a candidate that is not yet reachable by the + * collector. It becomes reachable once the embryo is + * enqueued. Therefore, we must ensure that no SCM-laden + * embryo appears in a (candidate) listener's queue between + * consecutive scan_children() calls. */ list_for_each_entry_safe(u, next, &gc_inflight_list, link) { + struct sock *sk = &u->sk; long total_refs; - long inflight_refs; - total_refs = file_count(u->sk.sk_socket->file); - inflight_refs = atomic_long_read(&u->inflight); + total_refs = file_count(sk->sk_socket->file); - BUG_ON(inflight_refs < 1); - BUG_ON(total_refs < inflight_refs); - if (total_refs == inflight_refs) { + BUG_ON(!u->inflight); + BUG_ON(total_refs < u->inflight); + if (total_refs == u->inflight) { list_move_tail(&u->link, &gc_candidates); __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); + + if (sk->sk_state == TCP_LISTEN) { + unix_state_lock(sk); + unix_state_unlock(sk); + } } } @@ -271,7 +286,7 @@ void unix_gc(void) /* Move cursor to after the current position. */ list_move(&cursor, &u->link); - if (atomic_long_read(&u->inflight) > 0) { + if (u->inflight) { list_move_tail(&u->link, ¬_cycle_list); __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); scan_children(&u->sk, inc_inflight_move_tail, NULL); diff --git a/net/unix/scm.c b/net/unix/scm.c index d1048b4c2baa..4eff7da9f6f9 100644 --- a/net/unix/scm.c +++ b/net/unix/scm.c @@ -52,12 +52,13 @@ void unix_inflight(struct user_struct *user, struct file *fp) if (s) { struct unix_sock *u = unix_sk(s); - if (atomic_long_inc_return(&u->inflight) == 1) { + if (!u->inflight) { BUG_ON(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); } else { BUG_ON(list_empty(&u->link)); } + u->inflight++; /* Paired with READ_ONCE() in wait_for_unix_gc() */ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1); } @@ -74,10 +75,11 @@ void unix_notinflight(struct user_struct *user, struct file *fp) if (s) { struct unix_sock *u = unix_sk(s); - BUG_ON(!atomic_long_read(&u->inflight)); + BUG_ON(!u->inflight); BUG_ON(list_empty(&u->link)); - if (atomic_long_dec_and_test(&u->inflight)) + u->inflight--; + if (!u->inflight) list_del_init(&u->link); /* Paired with READ_ONCE() in wait_for_unix_gc() */ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index e5eb5616be0c..1f61d15b3d1d 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1135,6 +1135,8 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, struct xsk_queue **q; int entries; + if (optlen < sizeof(entries)) + return -EINVAL; if (copy_from_sockptr(&entries, optval, sizeof(entries))) return -EFAULT; diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index 0ba500056e63..193a984f512c 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -66,7 +66,7 @@ static int check_diff(struct timeval start, struct timeval end) diff = end.tv_usec - start.tv_usec; diff += (end.tv_sec - start.tv_sec) * USECS_PER_SEC; - if (abs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) { + if (llabs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) { printf("Diff too high: %lld..", diff); return -1; } |